import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from scipy.stats import sem
from scipy.stats import shapiro
from scipy.stats import normaltest
from scipy.stats import ttest_rel,ttest_ind,wilcoxon
import statsmodels.api as sm
import pylab as py
home_dir = '/Users/dbao/google_drive_db'+'/road_construction/data/2022_online/'
home_dir = '../../../2022_online/'
map_dir = 'active_map/'
data_dir = 'data/preprocessed'
out_dir = home_dir + 'figures/cogsci_2022/'
R_out_dir = home_dir + 'R_analysis_data/'
data_puzzle_level = pd.read_csv(R_out_dir + 'data.csv')
puzzleID_order_data = data_puzzle_level.sort_values(["subjects","puzzleID"])
data_choice_level = pd.read_csv(R_out_dir + 'choice_level/choicelevel_data.csv')
single_condition_data = puzzleID_order_data[puzzleID_order_data['condition']==1].copy()
single_condition_data = single_condition_data.reset_index()
sc_data_choice_level = data_choice_level[data_choice_level['condition']==1].reset_index()
# how many undo sequences?
seq = 0
noseq = 0
seq2start = 0
seqNot2start = 0
tostart = 0
not2start = 0
for sub in range(100):
dat_sbj = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
for pzi in np.unique(sc_data_choice_level['puzzleID']):
dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()
lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
seq += np.sum(lastUndo_idx != firstUndo_idx)
noseq += np.sum(lastUndo_idx == firstUndo_idx)
lastUndo_idx = np.setdiff1d(lastUndo_idx,firstUndo_idx)
seq2start += np.sum(dat_sbj_pzi["choice"][lastUndo_idx] == 0)
seqNot2start += np.sum(dat_sbj_pzi["choice"][lastUndo_idx] > 2)
tostart += np.sum(dat_sbj_pzi["currNumCities"][lastUndo_idx]==1)
not2start += np.sum(dat_sbj_pzi["currNumCities"][lastUndo_idx]!=1)
print(seq)
print(noseq)
print(seq2start)
print(seqNot2start)
print(tostart)
print(not2start)
2094 566 1103 956 1103 991
undo_same_diff = []
for sub in range(100):
dat_sbj = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
undo_same_diff_puzzle = []
for pzi in np.unique(sc_data_choice_level['puzzleID']):
same_puzzle = 0
diff_puzzle = 0
dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()
lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
idxx = np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])
diff_puzzle += np.sum(idxx)
idxx = np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) == np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])
same_puzzle += np.sum(idxx)
undo_same_diff_puzzle.append([same_puzzle, diff_puzzle])
undo_same_diff_puzzle = np.array(undo_same_diff_puzzle)
undo_same_diff_puzzle = np.sum(undo_same_diff_puzzle,axis=0)
if np.sum(undo_same_diff_puzzle)==0:
print(sub) # who does not undo
undo_same_diff.append(undo_same_diff_puzzle)
undo_same_diff = np.array(undo_same_diff)
20 25 31 46 53 66 67 76 84 97
# exclude some never undoing subjects
undo_same_diff = undo_same_diff[np.where(np.sum(np.array(undo_same_diff),axis=1)!=0),:]
undo_same_diff = undo_same_diff.squeeze()
undo_same_diff_p = undo_same_diff/ np.sum(undo_same_diff,axis = 1)[:,None]
np.mean(undo_same_diff_p,axis=0)
array([0.11857317, 0.88142683])
## check variance
print(np.std(undo_same_diff_p[:,0], ddof=1))
print(np.std(undo_same_diff_p[:,1], ddof=1))
## check normality
sm.qqplot(undo_same_diff_p[:,0], line='s')
py.show()
sm.qqplot(undo_same_diff_p[:,1], line='s')
py.show()
0.14508927164018273 0.14508927164018273
stat1, p1 = wilcoxon(undo_same_diff_p[:,0],undo_same_diff_p[:,1])
print(stat1)
print(p1)
82.0 2.101861284114228e-15
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
# plt.figure(figsize=(5,3.75))
bb = plt.bar(range(2), np.mean(undo_same_diff_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.std(undo_same_diff_p,axis = 0)/np.sqrt(undo_same_diff_p.shape[0]))
plt.xticks([0,1], ['same','different'])
plt.ylabel('Proportion')
plt.xlabel('Undo target city') # next city after undoing
#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p1), ha='center', va='bottom', color=col, fontsize = 8)
fig.savefig(out_dir + 'proportion_undo_same_diff.png', dpi=600, bbox_inches='tight')
undo_for_better = []
for sub in range(100):
dat_sbj = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
undo_for_puzzle = []
for pzi in np.unique(sc_data_choice_level['puzzleID']):
dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()
firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
path_bf_undo = dat_sbj_pzi["currMas"][firstUndo_idx-1] # the mas of the state before undo
lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
path_af_undo = dat_sbj_pzi["currMas"][lastUndo_idx+1] # the mas of the state after undo
# I think it doesn't mean it choose different path, it means at least once the chosen city is different after undo
if np.any(np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])):
idxx = np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])
undo_for_puzzle.extend(np.sign(np.array(path_af_undo[idxx]) - np.array(path_bf_undo[idxx])))
# else:
# print('hup')
undo_for_puzzle = np.array(undo_for_puzzle)
undo_for_better.append([np.sum(undo_for_puzzle<0), np.sum(undo_for_puzzle==0) ,np.sum(undo_for_puzzle>0)])
undo_for_better = np.array(undo_for_better)
# exclude some never undoing subjects
undo_for_better = undo_for_better[np.where(np.sum(np.array(undo_for_better),axis=1)!=0),:]
undo_for_better = undo_for_better.squeeze()
undo_for_better_p = undo_for_better/ np.sum(undo_for_better,axis = 1)[:,None]
stat12, p12 = wilcoxon(undo_for_better_p[:,0],undo_for_better_p[:,1])
stat23, p23 = wilcoxon(undo_for_better_p[:,1],undo_for_better_p[:,2])
print(stat12)
print(p12)
print(stat23)
print(p23)
241.5 1.574357589671037e-10 728.5 2.4395252313658338e-06
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
# plt.figure(figsize=(5,3.75))
bb = plt.bar(range(3), np.mean(undo_for_better_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.std(undo_for_better_p,axis = 0)/np.sqrt(undo_for_better_p.shape[0]))
plt.ylabel('Maximum achivable score after undo')
plt.xticks([0,1,2], ['Worse','Same','Better'])
#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
x1, x2 = 1,2
y, h, col = np.max([bb[2].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p23), ha='center', va='bottom', color=col, fontsize = 8)
fig.savefig(out_dir + 'proportion_undo_same_better_worse.png', dpi=600, bbox_inches='tight')
undo_for_better = []
for sub in range(100):
dat_sbj = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
undo_for_puzzle = []
for pzi in np.unique(sc_data_choice_level['puzzleID']):
dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()
firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
path_bf_undo = dat_sbj_pzi["currMas"][lastUndo_idx-1] # the mas of the state before undo
path_af_undo = dat_sbj_pzi["currMas"][lastUndo_idx+1] # the mas of the state after undo
# I think it doesn't mean it choose different path, it means at least once the chosen city is different after undo
if np.any(np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])):
idxx = np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])
undo_for_puzzle.extend(np.sign(np.array(path_af_undo[idxx]) - np.array(path_bf_undo[idxx])))
# else:
# print('hup')
undo_for_puzzle = np.array(undo_for_puzzle)
undo_for_better.append([np.sum(undo_for_puzzle<0), np.sum(undo_for_puzzle==0) ,np.sum(undo_for_puzzle>0)])
undo_for_better = np.array(undo_for_better)
# exclude some never undoing subjects
undo_for_better = undo_for_better[np.where(np.sum(np.array(undo_for_better),axis=1)!=0),:]
undo_for_better = undo_for_better.squeeze()
undo_for_better_p = undo_for_better/ np.sum(undo_for_better,axis = 1)[:,None]
stat12, p12 = wilcoxon(undo_for_better_p[:,0],undo_for_better_p[:,1])
stat23, p23 = wilcoxon(undo_for_better_p[:,1],undo_for_better_p[:,2])
print(stat12)
print(p12)
print(stat23)
print(p23)
359.0 8.348934873555565e-08 1358.5 0.2096610964042439
%matplotlib inline
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
bb = plt.bar(range(3), np.mean(undo_for_better_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.std(undo_for_better_p,axis = 0)/np.sqrt(undo_for_better_p.shape[0]))
plt.ylabel('Maximum achivable score after undo')
plt.xticks([0,1,2], ['Worse','Same','Better'])
#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
x1, x2 = 1,2
y, h, col = np.max([bb[2].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p23), ha='center', va='bottom', color=col, fontsize = 8)
fig.savefig(out_dir + 'proportion_MAS_same_better_worse.png', dpi=600, bbox_inches='tight')
undo_same_diff = []
undo_4condi = [] # (undo2start, same), (undo2start, different), (undoNot2start, same), (undoNot2start, different)
for sub in range(100):
dat_sbj = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
undo_same_diff_puzzle = []
undo_4condi_puzzle = []
for pzi in np.unique(sc_data_choice_level['puzzleID']):
same_puzzle = 0
diff_puzzle = 0
dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()
lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
choice_idxx = dat_sbj_pzi[dat_sbj_pzi["choice"]==0].index
undo2start_idx = np.intersect1d(choice_idxx, lastUndo_idx)
undoNot2start_idx = np.setdiff1d(lastUndo_idx, undo2start_idx)
u2s_s = np.sum(np.array(dat_sbj_pzi["choice"][undo2start_idx-1]) == np.array(dat_sbj_pzi["choice"][undo2start_idx+1]))
u2s_d = np.sum(np.array(dat_sbj_pzi["choice"][undo2start_idx-1]) != np.array(dat_sbj_pzi["choice"][undo2start_idx+1]))
uns_s = np.sum(np.array(dat_sbj_pzi["choice"][undoNot2start_idx-1]) == np.array(dat_sbj_pzi["choice"][undoNot2start_idx+1]))
uns_d = np.sum(np.array(dat_sbj_pzi["choice"][undoNot2start_idx-1]) != np.array(dat_sbj_pzi["choice"][undoNot2start_idx+1]))
# undo_same_diff_puzzle.append([same_puzzle, diff_puzzle])
undo_4condi_puzzle.append([u2s_s, u2s_d, uns_s, uns_d])
# undo_same_diff_puzzle = np.array(undo_same_diff_puzzle)
# undo_same_diff_puzzle = np.sum(undo_same_diff_puzzle,axis=0)
undo_4condi_puzzle = np.array(undo_4condi_puzzle)
undo_4condi_puzzle = np.sum(undo_4condi_puzzle,axis = 0)
if np.sum(undo_4condi_puzzle)==0:
print(sub) # who does not undo
undo_4condi.append(undo_4condi_puzzle)
undo_4condi = np.array(undo_4condi)
20 25 31 46 53 66 67 76 84 97
undo_4condi
undo_4condi_p = undo_4condi/ np.sum(undo_4condi,axis = 1)[:,None]
# print(undo_4condi)
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:2: RuntimeWarning: invalid value encountered in true_divide
stat12, p12 = wilcoxon(undo_4condi_p[:,0],undo_4condi_p[:,1])
stat34, p34 = wilcoxon(undo_4condi_p[:,2],undo_4condi_p[:,3])
print(stat12)
print(p12)
print(stat34)
print(p34)
20.5 3.4547372556015567e-16 105.5 4.563751503697428e-16
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(6,4.5))
plt.bar(range(4), np.nanmean(undo_4condi_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(undo_4condi_p,axis = 0)/np.sqrt(undo_4condi_p.shape[0]))
plt.xticks([0,0.5,1,2,2.5,3], ['same','\nUndo to start','different','same','\nUndo not to start','different'])
([<matplotlib.axis.XTick at 0x7fed18e1e210>, <matplotlib.axis.XTick at 0x7fed2be9f5d0>, <matplotlib.axis.XTick at 0x7fed18d784d0>, <matplotlib.axis.XTick at 0x7fed2bea2350>, <matplotlib.axis.XTick at 0x7fed2bea2b50>, <matplotlib.axis.XTick at 0x7fed2beb34d0>], [Text(0.0, 0, 'same'), Text(0.5, 0, '\nUndo to start'), Text(1.0, 0, 'different'), Text(2.0, 0, 'same'), Text(2.5, 0, '\nUndo not to start'), Text(3.0, 0, 'different')])
undo_same_diff = []
undo_4condi = [] # (undo2start, same), (undo2start, different), (undoNot2start, same), (undoNot2start, different)
undo_better_2s = []
undo_better_n2s = []
for sub in range(100):
dat_sbj = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
undo_same_diff_puzzle = []
undo_4condi_puzzle = []
undo_better_2s_puzzle = []
undo_better_n2s_puzzle = []
for pzi in np.unique(sc_data_choice_level['puzzleID']):
same_puzzle = 0
diff_puzzle = 0
dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()
lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
choice_idxx = dat_sbj_pzi[dat_sbj_pzi["choice"]==0].index
undo2start_idx = np.intersect1d(choice_idxx, lastUndo_idx)
undoNot2start_idx = np.setdiff1d(lastUndo_idx, undo2start_idx)
path_bf_2s_undo = dat_sbj_pzi["currMas"][undo2start_idx-1] # the mas of the state before undo
path_af_2s_undo = dat_sbj_pzi["currMas"][undo2start_idx+1] # the mas of the state after undo
if np.any(np.array(dat_sbj_pzi["choice"][undo2start_idx-1]) != np.array(dat_sbj_pzi["choice"][undo2start_idx+1])):
idxx = np.array(dat_sbj_pzi["choice"][undo2start_idx-1]) != np.array(dat_sbj_pzi["choice"][undo2start_idx+1])
undo_better_2s_puzzle.extend(np.sign(np.array(path_af_2s_undo[idxx]) - np.array(path_bf_2s_undo[idxx])))
path_bf_n2s_undo = dat_sbj_pzi["currMas"][undoNot2start_idx-1] # the mas of the state before undo
path_af_n2s_undo = dat_sbj_pzi["currMas"][undoNot2start_idx+1] # the mas of the state after undo
if np.any(np.array(dat_sbj_pzi["choice"][undoNot2start_idx-1]) != np.array(dat_sbj_pzi["choice"][undoNot2start_idx+1])):
idxx = np.array(dat_sbj_pzi["choice"][undoNot2start_idx-1]) != np.array(dat_sbj_pzi["choice"][undoNot2start_idx+1])
undo_better_n2s_puzzle.extend(np.sign(np.array(path_af_n2s_undo[idxx]) - np.array(path_bf_n2s_undo[idxx])))
u2s_s = np.sum(np.array(dat_sbj_pzi["choice"][undo2start_idx-1]) == np.array(dat_sbj_pzi["choice"][undo2start_idx+1]))
u2s_d = np.sum(np.array(dat_sbj_pzi["choice"][undo2start_idx-1]) != np.array(dat_sbj_pzi["choice"][undo2start_idx+1]))
uns_s = np.sum(np.array(dat_sbj_pzi["choice"][undoNot2start_idx-1]) == np.array(dat_sbj_pzi["choice"][undoNot2start_idx+1]))
uns_d = np.sum(np.array(dat_sbj_pzi["choice"][undoNot2start_idx-1]) != np.array(dat_sbj_pzi["choice"][undoNot2start_idx+1]))
# undo_same_diff_puzzle.append([same_puzzle, diff_puzzle])
undo_4condi_puzzle.append([u2s_s, u2s_d, uns_s, uns_d])
# undo_same_diff_puzzle = np.array(undo_same_diff_puzzle)
# undo_same_diff_puzzle = np.sum(undo_same_diff_puzzle,axis=0)
undo_better_2s_puzzle = np.array(undo_better_2s_puzzle)
undo_better_2s.append([np.sum(undo_better_2s_puzzle<0), np.sum(undo_better_2s_puzzle==0) ,np.sum(undo_better_2s_puzzle>0)])
undo_better_n2s_puzzle = np.array(undo_better_n2s_puzzle)
undo_better_n2s.append([np.sum(undo_better_n2s_puzzle<0), np.sum(undo_better_n2s_puzzle==0) ,np.sum(undo_better_n2s_puzzle>0)])
undo_4condi_puzzle = np.array(undo_4condi_puzzle)
undo_4condi_puzzle = np.sum(undo_4condi_puzzle,axis = 0)
if np.sum(undo_4condi_puzzle)==0:
print(sub) # who does not undo
undo_4condi.append(undo_4condi_puzzle)
undo_4condi = np.array(undo_4condi)
undo_better_2s = np.array(undo_better_2s)
undo_better_n2s = np.array(undo_better_n2s)
20 25 31 46 53 66 67 76 84 97
undo_better_2s = undo_better_2s[np.where(np.sum(np.array(undo_better_2s),axis=1)!=0),:]
undo_better_2s = undo_better_2s.squeeze()
undo_better_2s_p = undo_better_2s/ np.sum(undo_better_2s,axis = 1)[:,None]
undo_better_n2s = undo_better_n2s[np.where(np.sum(np.array(undo_better_n2s),axis=1)!=0),:]
undo_better_n2s = undo_better_n2s.squeeze()
undo_better_n2s_p = undo_better_n2s/ np.sum(undo_better_n2s,axis = 1)[:,None]
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig , axes = plt.subplots(1,2,figsize=(6,4.5))
bb = axes[0].bar(range(3), np.nanmean(undo_better_2s_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(undo_better_2s_p,axis = 0)/np.sqrt(undo_better_2s_p.shape[0]))
axes[0].set_xticks([0,1,2], ['worse','same\nUndo to start','better'])
axes[0].set_ylim(0,.7)
stat12, p12 = wilcoxon(undo_better_2s_p[:,0],undo_better_2s_p[:,1])
stat23, p23 = wilcoxon(undo_better_2s_p[:,1],undo_better_2s_p[:,2])
print(stat12)
print(p12)
print(stat23)
print(p23)
#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
axes[0].plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
axes[0].text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
x1, x2 = 1,2
y, h, col = np.max([bb[1].get_height(),bb[2].get_height()]) + 0.1, 0.05, 'k'
axes[0].plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
axes[0].text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p23), ha='center', va='bottom', color=col, fontsize = 8)
bb2 = axes[1].bar(range(3), np.nanmean(undo_better_n2s_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(undo_better_n2s_p,axis = 0)/np.sqrt(undo_better_n2s_p.shape[0]))
axes[1].set_xticks([0,1,2], ['worse','same\nUndo not to start','better'])
axes[1].set_ylim(0,.7)
stat12, p12 = wilcoxon(undo_better_n2s_p[:,0],undo_better_n2s_p[:,1])
stat23, p23 = wilcoxon(undo_better_n2s_p[:,1],undo_better_n2s_p[:,2])
print(stat12)
print(p12)
print(stat23)
print(p23)
#statistics
x1, x2 = 0,1
y, h, col = np.max([bb2[0].get_height(),bb2[1].get_height()]) + 0.1, 0.05, 'k'
axes[1].plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
axes[1].text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
x1, x2 = 1,2
y, h, col = np.max([bb2[1].get_height(),bb2[2].get_height()]) + 0.1, 0.05, 'k'
axes[1].plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
axes[1].text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p23), ha='center', va='bottom', color=col, fontsize = 8)
479.0 0.014719115686298584 431.0 2.721493900890657e-05 330.0 2.0015475895461355e-08 1189.0 0.21222251859233165
Text(1.5, 0.5927749883863996, '$p = 0.212223$')
undo_same_diff = []
undo_4condi = [] # (undo2start, same), (undo2start, different), (undoNot2start, same), (undoNot2start, different)
for sub in range(100):
dat_sbj = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
undo_same_diff_puzzle = []
undo_4condi_puzzle = []
for pzi in np.unique(sc_data_choice_level['puzzleID']):
same_puzzle = 0
diff_puzzle = 0
dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()
firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
lastUndo_idx = np.setdiff1d(lastUndo_idx, firstUndo_idx) # take only accounts for sequential undoes
choice_idxx = dat_sbj_pzi[dat_sbj_pzi["choice"]==0].index
undo2start_idx = np.intersect1d(choice_idxx, lastUndo_idx)
undoNot2start_idx = np.setdiff1d(lastUndo_idx, undo2start_idx)
u2s_s = np.sum(np.array(dat_sbj_pzi["choice"][undo2start_idx-1]) == np.array(dat_sbj_pzi["choice"][undo2start_idx+1]))
u2s_d = np.sum(np.array(dat_sbj_pzi["choice"][undo2start_idx-1]) != np.array(dat_sbj_pzi["choice"][undo2start_idx+1]))
uns_s = np.sum(np.array(dat_sbj_pzi["choice"][undoNot2start_idx-1]) == np.array(dat_sbj_pzi["choice"][undoNot2start_idx+1]))
uns_d = np.sum(np.array(dat_sbj_pzi["choice"][undoNot2start_idx-1]) != np.array(dat_sbj_pzi["choice"][undoNot2start_idx+1]))
# undo_same_diff_puzzle.append([same_puzzle, diff_puzzle])
undo_4condi_puzzle.append([u2s_s, u2s_d, uns_s, uns_d])
# undo_same_diff_puzzle = np.array(undo_same_diff_puzzle)
# undo_same_diff_puzzle = np.sum(undo_same_diff_puzzle,axis=0)
undo_4condi_puzzle = np.array(undo_4condi_puzzle)
undo_4condi_puzzle = np.sum(undo_4condi_puzzle,axis = 0)
if np.sum(undo_4condi_puzzle)==0:
print(sub) # who does not undo
undo_4condi.append(undo_4condi_puzzle)
undo_4condi = np.array(undo_4condi)
8 12 20 25 26 27 28 31 33 34 46 53 57 66 67 76 84 93 94 97
undo_4condi
undo_4condi_p = undo_4condi/ np.sum(undo_4condi,axis = 1)[:,None]
# print(undo_4condi)
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:2: RuntimeWarning: invalid value encountered in true_divide
stat12, p12 = wilcoxon(undo_4condi_p[:,0],undo_4condi_p[:,1])
stat34, p34 = wilcoxon(undo_4condi_p[:,2],undo_4condi_p[:,3])
print(stat12)
print(p12)
print(stat34)
print(p34)
3.5 4.245167166785722e-17 11.0 5.413752964972779e-17
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(6,4.5))
plt.bar(range(4), np.nanmean(undo_4condi_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(undo_4condi_p,axis = 0)/np.sqrt(undo_4condi_p.shape[0]))
plt.xticks([0,0.5,1,2,2.5,3], ['same','\nUndo to start','different','same','\nUndo not to start','different'])
plt.ylabel('Maximum achivable score after undo')
# plt.xlabel('After undoing')
Text(0, 0.5, 'Maximum achivable score after undo')
undo_nos = []
for sub in range(100):
dat_sbj = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
undo_nos_puzzle = []
for pzi in np.unique(sc_data_choice_level['puzzleID']):
dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()
firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
path_bf_undo = dat_sbj_pzi["currNos"][lastUndo_idx-1] # the mas of the state before undo
path_af_undo = dat_sbj_pzi["currNos"][lastUndo_idx+1] # the mas of the state after undo
# I think it doesn't mean it choose different path, it means at least once the chosen city is different after undo
if np.any(np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])):
idxx = np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])
undo_nos_puzzle.extend(np.sign(np.array(path_af_undo[idxx]) - np.array(path_bf_undo[idxx])))
# else:
# print('hup')
undo_nos_puzzle = np.array(undo_nos_puzzle)
undo_nos.append([np.sum(undo_nos_puzzle<0), np.sum(undo_nos_puzzle==0) ,np.sum(undo_nos_puzzle>0)])
undo_nos = np.array(undo_nos)
# exclude some never undoing subjects
undo_nos = undo_nos[np.where(np.sum(np.array(undo_nos),axis=1)!=0),:]
undo_nos = undo_nos.squeeze()
undo_nos_p = undo_nos/ np.sum(undo_nos,axis = 1)[:,None]
stat13, p13 = wilcoxon(undo_nos_p[:,0],undo_nos_p[:,2])
print(stat13)
print(p13)
982.0 0.008342293270650666
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
# plt.figure(figsize=(5,3.75))
bb = plt.bar(range(3), np.mean(undo_nos_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.std(undo_nos_p,axis = 0)/np.sqrt(undo_nos_p.shape[0]))
plt.ylabel('Number of optimal solutions after undo')
plt.xticks([0,1,2], ['Decreased','Same','Increased'])
# means they result in less confusing path?
# less difficult path?
#statistics
x1, x2 = 0,2
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
fig.savefig(out_dir + 'proportion_NOS_same_better_worse.png', dpi=600, bbox_inches='tight')
undo_budget = []
for sub in range(100):
dat_sbj = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
undo_budget_puzzle = []
for pzi in np.unique(sc_data_choice_level['puzzleID']):
dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()
firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
submit_idx = pd.Int64Index([len(dat_sbj_pzi)-1])
t_idx = (firstUndo_idx[1:]-1).to_list()
t_idx.append(submit_idx.item())
path_bf_undo = dat_sbj_pzi["leftover"][firstUndo_idx-1]
path_af_undo = dat_sbj_pzi["leftover"][t_idx]
# I think it doesn't mean it choose different path, it means at least once the chosen city is different after undo
if np.any(np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])):
idxx = np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])
undo_budget_puzzle.extend(np.sign(np.array(path_af_undo[idxx]) - np.array(path_bf_undo[idxx])))
# else:
# print('hup')
undo_budget_puzzle = np.array(undo_budget_puzzle)
undo_budget.append([np.sum(undo_budget_puzzle<0), np.sum(undo_budget_puzzle==0) ,np.sum(undo_budget_puzzle>0)])
undo_budget = np.array(undo_budget)
# exclude some never undoing subjects
undo_budget = undo_budget[np.where(np.sum(np.array(undo_budget),axis=1)!=0),:]
undo_budget = undo_budget.squeeze()
undo_budget = undo_budget[:,(0,2)]
undo_budget_p = undo_budget/ np.sum(undo_budget,axis = 1)[:,None]
stat12, p12 = wilcoxon(undo_budget_p[:,0],undo_budget_p[:,1])
print(stat12)
print(p12)
327.5 1.7566375349786966e-11
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
# plt.figure(figsize=(5,3.75))
bb = plt.bar(range(2), np.mean(undo_budget_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.std(undo_budget_p,axis = 0)/np.sqrt(undo_budget_p.shape[0]))
plt.ylabel('Number of optimal solutions after undo')
plt.xticks([0,1], ['Decreased','Increased']) # increased nos means that its much easier?
#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
fig.savefig(out_dir + 'proportion_Nos_increased_decreased.png', dpi=600, bbox_inches='tight')
RT_diff_around = []
RT_around = []
for sub in range(100):
dat_sbj = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
RT_around_for_puzzle = np.empty((0,2))
RT_diff_around_for_puzzle = []
for pzi in np.unique(sc_data_choice_level['puzzleID']):
dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()
firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
path_bf_undo = dat_sbj_pzi["RT"][firstUndo_idx-1] # the mas of the state before undo
lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
path_af_undo = dat_sbj_pzi["RT"][lastUndo_idx+1] # the mas of the state after undo
if len(firstUndo_idx) is not 0:
RT_around_for_puzzle = np.concatenate((RT_around_for_puzzle, np.array([np.array(path_bf_undo), np.array(path_af_undo)]).transpose()),axis=0)
# I think it doesn't mean it choose different path, it means at least once the chosen city is different after undo
if np.any(np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])):
idxx = np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])
RT_diff_around_for_puzzle.extend(np.array(path_af_undo[idxx]) - np.array(path_bf_undo[idxx]))
# else
# print('hup')
RT_diff_around_for_puzzle = np.array(RT_diff_around_for_puzzle)
RT_around_for_puzzle = np.mean(RT_around_for_puzzle,axis=0)
RT_diff_around.append([np.sum(RT_diff_around_for_puzzle<0) ,np.sum(RT_diff_around_for_puzzle>0)])
RT_around.append(RT_around_for_puzzle)
RT_diff_around = np.array(RT_diff_around)
RT_around = np.array(RT_around)
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/numpy/core/fromnumeric.py:3441: RuntimeWarning: Mean of empty slice. out=out, **kwargs) /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/numpy/core/_methods.py:182: RuntimeWarning: invalid value encountered in true_divide ret, rcount, out=ret, casting='unsafe', subok=False)
# exclude some never undoing subjects
RT_diff_around_p = RT_diff_around/ np.sum(RT_diff_around,axis = 1)[:,None]
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:2: RuntimeWarning: invalid value encountered in true_divide
print(np.nanmean(RT_around,axis=0))
print(np.nanmean(RT_diff_around_p,axis=0))
[1741.89441925 2559.44066795] [0.29879522 0.70120478]
stat12, p12 = wilcoxon(RT_diff_around_p[:,0],RT_diff_around_p[:,1])
print(stat12)
print(p12)
300.0 1.222315890309548e-13
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(6,4.5))
# plt.figure(figsize=(5,3.75))
bb = plt.bar(range(2), np.nanmean(RT_diff_around_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(RT_diff_around_p,axis = 0)/np.sqrt(RT_diff_around_p.shape[0]))
plt.ylabel('Reaction time')
plt.xticks([0,1], ['Decreased','Increased']) # increased nos means that its much easier?
#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
Text(0.5, 0.8512047834180481, '$p = 0.000000$')
RT_around = RT_around/1000
stat12, p12 = wilcoxon(RT_around[:,0],RT_around[:,1])
print(stat12)
print(p12)
589.0 2.802404040905235e-11
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(6,4.5))
# plt.figure(figsize=(5,3.75))
bb = plt.bar(range(2), np.nanmean(RT_around,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(RT_around,axis = 0)/np.sqrt(RT_around.shape[0]))
plt.ylabel('Reaction time (s)')
plt.xticks([0,1], ['before','after']) # increased nos means that its much easier?
#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.3, 0.03, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
Text(0.5, 2.8894406679458173, '$p = 0.000000$')
import copy
def get_tortuosity(pathList, pzi = 15):
def cal_dist(xy1, xy2):
return np.sqrt(np.sum((np.array(xy1)-np.array(xy2))**2))
pathList_ = copy.deepcopy(pathList)
### for the def
import json
with open('./util/basicMap.json','rb') as f:
basic_map = json.load(f)
tortuosity = []
###
map_pz = basic_map[pzi]
import numpy as np
for _ in range(len(pathList_)):
path = pathList_.pop(0)
if len(path)>1:
ct_init = path.pop(0)
# print(path)
# print('init{}_tgt{}'.format(ct_init, path[-1]))
# print('map_init{}_map_tgt{}'.format(map_pz['xy'][ct_init], map_pz['xy'][path[-1]]))
d_straight = cal_dist(map_pz['xy'][ct_init], map_pz['xy'][path[-1]])
d_zigzag = 0
for _ in range(len(path)):
ct_tgt = path.pop(0)
d_zigzag += cal_dist(map_pz['xy'][ct_init], map_pz['xy'][ct_tgt])
ct_init = ct_tgt
tortuosity.append([d_zigzag, d_straight])
else:
tortuosity.append([np.nan, np.nan])
return np.array(tortuosity)
import difflib
def get_overlap(s1, s2):
s = difflib.SequenceMatcher(None, s1, s2)
pos_a, pos_b, size = s.find_longest_match(0, len(s1), 0, len(s2))
return s1[pos_a:pos_a+size]
def get_overlaplist(l1,l2,minlen=1):
matched_seq = []
for i1 in range(len(l1)):
# print('l1:{}'.format(i1))
matches = [i2 for i2 in range(len(l2)) if l2[i2] == l1[i1]]
# print(matches)
for m in matches:
matched_seq_ = []
i_a = 0
while ((i1+i_a)<len(l1)) and ((m+i_a)< len(l2)):
if l1[i1+i_a]==l2[m+i_a]:
matched_seq_.append(l1[i1+i_a])
i_a += 1
else:
break
matched_seq.append(matched_seq_)
idx = np.argsort([len(seq) for seq in matched_seq])[::-1]
# print(idx)
seqs_ = []
for i in idx:
if len(seqs_) == 0:
seqs_.append(matched_seq[i])
else:
if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
seqs_.append(matched_seq[i])
matched_seq = seqs_.copy()
idx = np.argsort([len(seq) for seq in matched_seq])
seqs_ = []
for i in idx:
if len(seqs_) == 0:
seqs_.append(matched_seq[i])
else:
if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
seqs_.append(matched_seq[i])
seqs = []
for seq in seqs_:
# print(len(seq))
if not len(seq)<minlen:
seqs.append(seq)
return seqs
def trimmer(matched_seq,minlen=1):
idx = np.argsort([len(seq) for seq in matched_seq])[::-1]
# print(idx)
seqs_ = []
for i in idx:
if len(seqs_) == 0:
seqs_.append(matched_seq[i])
else:
if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
seqs_.append(matched_seq[i])
matched_seq = seqs_.copy()
idx = np.argsort([len(seq) for seq in matched_seq])
seqs_ = []
for i in idx:
if len(seqs_) == 0:
seqs_.append(matched_seq[i])
else:
if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
seqs_.append(matched_seq[i])
seqs = []
for seq in seqs_:
# print(len(seq))
if not len(seq)<minlen:
seqs.append(seq)
return seqs
undo_zigzag = []
undo_zigzag_diff =[]
undo_zigzag_diff_inlen =[]
undo_zigzag_normalized = []
overlap_seq_inorder = []
overlap_seq_inorder_inlen=[]
length_seq = []
undo_length_bfaf = []
undo_length_af = []
undo_length_bf = []
for sub in range(100):
dat_sbj = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
undo_zigzag_puzzle = []
undo_zigzag_normalized_puzzle = []
undo_zigzag_diff_for_puzzle = []
undo_zigzag_diff_inlen_for_puzzle = []
undo_zigzag_normalized_puzzle = []
overlap_seq_inorder_for_puzzle = []
overlap_seq_inorder_inlen_for_puzzle=[]
undo_length_bf_puzzle=[]
undo_length_af_puzzle=[]
for pzi in np.unique(sc_data_choice_level['puzzleID']):
dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()
firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
submit_idx = pd.Int64Index([len(dat_sbj_pzi)-1])
if len(firstUndo_idx)>0:
if len(firstUndo_idx)==1: #
seq_bf = dat_sbj_pzi["path"][firstUndo_idx-1]
seq_af = dat_sbj_pzi["path"][submit_idx]
seq_im = dat_sbj_pzi["path"][lastUndo_idx]
else:
seq_bf = dat_sbj_pzi["path"][firstUndo_idx-1]
t_idx = (firstUndo_idx[1:]-1).to_list()
t_idx.append(submit_idx.item())
seq_af = dat_sbj_pzi["path"][t_idx]
seq_im = dat_sbj_pzi["path"][lastUndo_idx]
seq_im = seq_im.reset_index()
pathStr_im = [seq_im.loc[i].path.strip('[').strip(']') for i in range(len(seq_im))]
pathList_im = [[int(i) for i in pathStr_im[j].split(', ')] for j in range(len(pathStr_im))]
pathStr_im = [" ".join([str(a) for a in pathList_im[j]]) for j in range(len(seq_im))]
seq_bf = seq_bf.reset_index()
pathStr_bf = [seq_bf.loc[i].path.strip('[').strip(']') for i in range(len(seq_bf))]
pathList_bf = [[int(i) for i in pathStr_bf[j].split(', ')] for j in range(len(pathStr_bf))]
pathStr_bf = [" ".join([str(a) for a in pathList_bf[j]]) for j in range(len(seq_bf))]
seq_af = seq_af.reset_index()
pathStr_af = [seq_af.loc[i].path.strip('[').strip(']') for i in range(len(seq_af))]
pathList_af = [[int(i) for i in pathStr_af[j].split(', ')] for j in range(len(pathStr_af))]
pathStr_af = [" ".join([str(a) for a in pathList_af[j]]) for j in range(len(seq_af))]
pathList_bfim = [np.setdiff1d(np.array(pathList_bf[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_bf))]
pathStr_bfim = [" ".join([str(a) for a in pathList_bfim[j]]) for j in range(len(pathList_im))]
pathList_afim = [np.setdiff1d(np.array(pathList_af[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_af))]
pathStr_afim = [" ".join([str(a) for a in pathList_afim[j]]) for j in range(len(pathList_im))]
tor_bf = get_tortuosity(pathList_bfim, pzi)
path_bf_undo = (tor_bf[:,0]/tor_bf[:,1])
tor_af = get_tortuosity(pathList_afim, pzi)
path_af_undo = (tor_af[:,0]/tor_af[:,1])
len_path_bf = np.array([len(path) for path in pathList_bfim])
len_path_af = np.array([len(path) for path in pathList_afim])
zigzag_diff = np.array(path_af_undo) - np.array(path_bf_undo)
overlap_seq_2 = []
zigzag_diff_2 = []
seq_inorder_2 = []
for i in range(len(seq_af)):
temp = []
for j in range(i+1):
# temp.append(get_overlaplist(pathList_afim[i], pathList_bfim[j]))
temptemp = get_overlaplist(pathList_afim[i], pathList_bfim[j])
if not len(temptemp)==0:
temp.extend(temptemp)
temp = trimmer(temp)
if len(temp)==0:
temp.append([])
seq_inorder_2.append(temp)
# len_seq_inorder_2 = [len(s) for s in temp if len(s)!=0]
len_seq_inorder_2 =[]
tempzigd = []
for s in temp:
# print('*')
if len(s)==0:
len_seq_inorder_2.append(0)
else:
len_seq_inorder_2.append(len(s))
tempzigd.append(zigzag_diff[i])
overlap_seq_2.extend(len_seq_inorder_2)
zigzag_diff_2.extend(tempzigd)
# I think it doesn't mean it choose different path, it means at least once the chosen city is different after undo
if np.any(np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])):
idxx = np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])
undo_zigzag_puzzle.extend(np.sign(np.array(path_af_undo[idxx]) - np.array(path_bf_undo[idxx])))
undo_zigzag_normalized_puzzle.extend(np.sign(np.array(path_af_undo[idxx])/len_path_af[idxx] - np.array(path_bf_undo[idxx])/len_path_bf[idxx]))
undo_length_bf_puzzle.extend(len_path_bf[idxx].tolist())
undo_length_af_puzzle.extend(len_path_af[idxx].tolist())
overlap_seq_inorder_for_puzzle.append([np.sum(np.array(overlap_seq_2)==0), np.sum(np.array(overlap_seq_2)!=0)])
overlap_seq_inorder_inlen_for_puzzle.append([np.sum(np.array(overlap_seq_2)==0), np.sum(np.array(overlap_seq_2)==1),
np.sum(np.array(overlap_seq_2)==2), np.sum(np.array(overlap_seq_2)>2),])
length_seq.extend(overlap_seq_2)
undo_zigzag_diff_for_puzzle.append([np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)==0]) , np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)!=0]) ] )
undo_zigzag_diff_inlen_for_puzzle.append([np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)==0]), np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)==1]),
np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)==2]), np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)>2]),])
# else:
# overlap_seq_inorder_for_puzzle.append([np.nan,np.nan])
# overlap_seq_inorder_inlen_for_puzzle.append([np.nan,np.nan,np.nan,np.nan])
overlap_seq_inorder_for_puzzle = np.array(overlap_seq_inorder_for_puzzle)
temp = overlap_seq_inorder_for_puzzle.copy()
# overlap_seq_inorder.append(np.nansum(temp,axis=0))
# print('*'*10)
if len(temp) == 0:
overlap_seq_inorder.append(np.zeros((2)))
else:
overlap_seq_inorder.append(np.nansum(temp,axis=0))
overlap_seq_inorder_inlen_for_puzzle=np.array(overlap_seq_inorder_inlen_for_puzzle)
temp = overlap_seq_inorder_inlen_for_puzzle.copy()
if len(temp) == 0:
overlap_seq_inorder_inlen.append(np.zeros((4)))
else:
overlap_seq_inorder_inlen.append(np.nansum(temp,axis=0))
undo_zigzag_puzzle = np.array(undo_zigzag_puzzle)
undo_zigzag_normalized_puzzle = np.array(undo_zigzag_normalized_puzzle)
# undo_zigzag.append([np.sum(undo_zigzag_puzzle<0), np.sum(undo_zigzag_puzzle==0) ,np.sum(undo_zigzag_puzzle>0)])
undo_zigzag.append([np.sum(undo_zigzag_puzzle<0) ,np.sum(undo_zigzag_puzzle>0)])
undo_zigzag_normalized.append([np.sum(undo_zigzag_normalized_puzzle<0) ,np.sum(undo_zigzag_normalized_puzzle>0)])
undo_length_af.extend(undo_length_af_puzzle)
undo_length_bf.extend(undo_length_bf_puzzle)
# undo_length_bfaf.append(np.array([undo_length_bf_puzzle, undo_length_af_puzzle]))
undo_zigzag_diff_for_puzzle=np.array(undo_zigzag_diff_for_puzzle)
temp = undo_zigzag_diff_for_puzzle.copy()
if len(temp) == 0:
undo_zigzag_diff.append(np.zeros((2)))
else:
undo_zigzag_diff.append(np.nansum(temp,axis=0))
undo_zigzag_diff_inlen_for_puzzle=np.array(undo_zigzag_diff_inlen_for_puzzle)
temp = undo_zigzag_diff_inlen_for_puzzle.copy()
if len(temp) == 0:
undo_zigzag_diff_inlen.append(np.zeros((4)))
else:
undo_zigzag_diff_inlen.append(np.nansum(temp,axis=0))
undo_zigzag = np.array(undo_zigzag)
undo_zigzag_normalized = np.array(undo_zigzag_normalized)
undo_zigzag_diff = np.array(undo_zigzag_diff)
undo_zigzag_diff_inlen = np.array(undo_zigzag_diff_inlen)
overlap_seq_inorder = np.array(overlap_seq_inorder)
overlap_seq_inorder_inlen = np.array(overlap_seq_inorder_inlen)
undo_length_bfaf = np.array([undo_length_bf, undo_length_af])
overlap_seq_inorder_inlen_minlen2 = overlap_seq_inorder_inlen.copy()
overlap_seq_inorder_inlen_minlen2 = overlap_seq_inorder_inlen_minlen2[:,np.setdiff1d(range(overlap_seq_inorder_inlen_minlen2.shape[1]),1)]
overlap_seq_inorder_minlen2 = overlap_seq_inorder_inlen_minlen2.copy()
overlap_seq_inorder_minlen2 = np.array([overlap_seq_inorder_minlen2[:,0], np.sum(overlap_seq_inorder_minlen2[:,1:],axis=1) ]).transpose()
overlap_seq_inorder_p = overlap_seq_inorder/np.sum(overlap_seq_inorder,axis=1)[:,None]
overlap_seq_inorder_inlen_p = overlap_seq_inorder_inlen/np.sum(overlap_seq_inorder_inlen,axis=1)[:,None]
overlap_seq_inorder_minlen2_p = overlap_seq_inorder_minlen2/np.sum(overlap_seq_inorder_minlen2,axis=1)[:,None]
overlap_seq_inorder_inlen_minlen2_p = overlap_seq_inorder_inlen_minlen2/np.sum(overlap_seq_inorder_inlen_minlen2,axis=1)[:,None]
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:6: RuntimeWarning: invalid value encountered in true_divide /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in true_divide import sys /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in true_divide # Remove the CWD from sys.path while we load stuff. /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:11: RuntimeWarning: invalid value encountered in true_divide # This is added back by InteractiveShellApp.init_path()
undo_zigzag_diff_inlen_minlen2 = undo_zigzag_diff_inlen.copy()
undo_zigzag_diff_inlen_minlen2 = undo_zigzag_diff_inlen_minlen2[:,np.setdiff1d(range(undo_zigzag_diff_inlen_minlen2.shape[1]),1)]
undo_zigzag_diff_minlen2 = undo_zigzag_diff_inlen_minlen2.copy()
undo_zigzag_diff_minlen2 = np.array([undo_zigzag_diff_minlen2[:,0], np.sum(undo_zigzag_diff_minlen2[:,1:],axis=1) ]).transpose()
undo_zigzag_diff_minlen2_p = undo_zigzag_diff_minlen2/np.sum(undo_zigzag_diff_minlen2,axis=1)[:,None]
undo_zigzag_diff_inlen_minlen2_p = undo_zigzag_diff_inlen_minlen2/np.sum(undo_zigzag_diff_inlen_minlen2,axis=1)[:,None]
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:6: RuntimeWarning: invalid value encountered in true_divide /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in true_divide import sys
# exclude some never undoing subjects
undo_zigzag = undo_zigzag[np.where(np.sum(np.array(undo_zigzag),axis=1)!=0),:]
undo_zigzag = undo_zigzag.squeeze()
undo_zigzag_p = undo_zigzag/ np.sum(undo_zigzag,axis = 1)[:,None]
undo_zigzag_normalized = undo_zigzag_normalized[np.where(np.sum(np.array(undo_zigzag_normalized),axis=1)!=0),:]
undo_zigzag_normalized = undo_zigzag_normalized.squeeze()
undo_zigzag_normalized_p = undo_zigzag_normalized/ np.sum(undo_zigzag_normalized,axis = 1)[:,None]
stat12, p12 = wilcoxon(undo_zigzag_p[:,0],undo_zigzag_p[:,1])
print(stat12)
print(p12)
125.0 2.4502913299164797e-11
stat12, p12 = wilcoxon(undo_zigzag_normalized_p[:,0],undo_zigzag_normalized_p[:,1])
print(stat12)
print(p12)
536.0 9.889191051818236e-05
%matplotlib inline
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
# plt.figure(figsize=(5,3.75))
bb = plt.bar(range(2), np.mean(undo_zigzag_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.std(undo_zigzag_p,axis = 0)/np.sqrt(undo_zigzag_p.shape[0]))
plt.ylabel('Zigzag-ness after undo')
plt.xticks([0,1], ['Decreased','Increased'])
# means they got into the path that has better mas, and less confusing?
#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
fig.savefig(out_dir + 'proportion_zigzagness_increased_decreased.png', dpi=600, bbox_inches='tight')
%matplotlib inline
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
# plt.figure(figsize=(5,3.75))
bb = plt.bar(range(2), np.mean(undo_zigzag_normalized_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.std(undo_zigzag_normalized_p,axis = 0)/np.sqrt(undo_zigzag_normalized_p.shape[0]))
plt.ylabel('Zigzag-ness after undo (divided by length')
plt.xticks([0,1], ['Decreased','Increased'])
# means they got into the path that has better mas, and less confusing?
#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
fig.savefig(out_dir + 'proportion_normalized_zigzagness_increased_decreased.png', dpi=600, bbox_inches='tight')
import difflib
def get_overlap(s1, s2):
s = difflib.SequenceMatcher(None, s1, s2)
pos_a, pos_b, size = s.find_longest_match(0, len(s1), 0, len(s2))
return s1[pos_a:pos_a+size]
def get_overlaplist(l1,l2,minlen=1):
matched_seq = []
for i1 in range(len(l1)):
# print('l1:{}'.format(i1))
matches = [i2 for i2 in range(len(l2)) if l2[i2] == l1[i1]]
# print(matches)
for m in matches:
matched_seq_ = []
i_a = 0
while ((i1+i_a)<len(l1)) and ((m+i_a)< len(l2)):
if l1[i1+i_a]==l2[m+i_a]:
matched_seq_.append(l1[i1+i_a])
i_a += 1
else:
break
matched_seq.append(matched_seq_)
idx = np.argsort([len(seq) for seq in matched_seq])[::-1]
# print(idx)
seqs_ = []
for i in idx:
if len(seqs_) == 0:
seqs_.append(matched_seq[i])
else:
if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
seqs_.append(matched_seq[i])
matched_seq = seqs_.copy()
idx = np.argsort([len(seq) for seq in matched_seq])
seqs_ = []
for i in idx:
if len(seqs_) == 0:
seqs_.append(matched_seq[i])
else:
if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
seqs_.append(matched_seq[i])
seqs = []
for seq in seqs_:
# print(len(seq))
if not len(seq)<minlen:
seqs.append(seq)
return seqs
def trimmer(matched_seq,minlen=1):
idx = np.argsort([len(seq) for seq in matched_seq])[::-1]
# print(idx)
seqs_ = []
for i in idx:
if len(seqs_) == 0:
seqs_.append(matched_seq[i])
else:
if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
seqs_.append(matched_seq[i])
matched_seq = seqs_.copy()
idx = np.argsort([len(seq) for seq in matched_seq])
seqs_ = []
for i in idx:
if len(seqs_) == 0:
seqs_.append(matched_seq[i])
else:
if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
seqs_.append(matched_seq[i])
seqs = []
for seq in seqs_:
# print(len(seq))
if not len(seq)<minlen:
seqs.append(seq)
return seqs
overlap_seq_inorder = []
overlap_seq_inorder_inlen=[]
currmas = []
currmas_inlen=[]
endmas = []
endmas_inlen=[]
currnos = []
currnos_inlen=[]
leftover = []
leftover_inlen=[]
RT = []
RT_inlen = []
length_seq = []
for sub in range(100):
dat_sbj = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
overlap_seq_inorder_for_puzzle = []
overlap_seq_inorder_inlen_for_puzzle=[]
currmas_for_puzzle = []
currmas_inlen_for_puzzle=[]
endmas_for_puzzle = []
endmas_inlen_for_puzzle=[]
currnos_for_puzzle = []
currnos_inlen_for_puzzle=[]
leftover_for_puzzle = []
leftover_inlen_for_puzzle=[]
RT_for_puzzle = []
RT_inlen_for_puzzle = []
for pzi in np.unique(sc_data_choice_level['puzzleID']):
dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()
firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
path_bf_undo = dat_sbj_pzi["currMas"][firstUndo_idx-1] # the mas of the state before undo
lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
path_af_undo = dat_sbj_pzi["currMas"][lastUndo_idx+1] # the mas of the state after undo
submit_idx = pd.Int64Index([len(dat_sbj_pzi)-1])
if len(firstUndo_idx)>0:
if len(firstUndo_idx)==1: #
seq_bf = dat_sbj_pzi["path"][firstUndo_idx-1]
seq_af = dat_sbj_pzi["path"][submit_idx]
seq_im = dat_sbj_pzi["path"][lastUndo_idx]
currmas_bf = dat_sbj_pzi["currMas"][lastUndo_idx-1]
currmas_af = dat_sbj_pzi["currMas"][lastUndo_idx+1]
endmas_bf = dat_sbj_pzi["currMas"][firstUndo_idx-1]
endmas_af = dat_sbj_pzi["currMas"][submit_idx]
currnos_bf = dat_sbj_pzi["currNos"][lastUndo_idx-1]
currnos_af = dat_sbj_pzi["currNos"][lastUndo_idx+1]
leftover_bf = dat_sbj_pzi["leftover"][firstUndo_idx-1]
leftover_af = dat_sbj_pzi["leftover"][submit_idx]
RT_bf = dat_sbj_pzi["RT"][lastUndo_idx-1]
RT_af = dat_sbj_pzi["RT"][lastUndo_idx+1]
else:
seq_bf = dat_sbj_pzi["path"][firstUndo_idx-1]
t_idx = (firstUndo_idx[1:]-1).to_list()
t_idx.append(submit_idx.item())
seq_af = dat_sbj_pzi["path"][t_idx]
seq_im = dat_sbj_pzi["path"][lastUndo_idx]
currmas_bf = dat_sbj_pzi["currMas"][lastUndo_idx-1]
currmas_af = dat_sbj_pzi["currMas"][lastUndo_idx+1]
endmas_bf = dat_sbj_pzi["currMas"][firstUndo_idx-1]
endmas_af = dat_sbj_pzi["currMas"][t_idx]
currnos_bf = dat_sbj_pzi["currNos"][lastUndo_idx-1]
currnos_af = dat_sbj_pzi["currNos"][lastUndo_idx+1]
leftover_bf = dat_sbj_pzi["leftover"][firstUndo_idx-1]
leftover_af = dat_sbj_pzi["leftover"][t_idx]
RT_bf = dat_sbj_pzi["RT"][lastUndo_idx-1]
RT_af = dat_sbj_pzi["RT"][lastUndo_idx+1]
seq_im = seq_im.reset_index()
pathStr_im = [seq_im.loc[i].path.strip('[').strip(']') for i in range(len(seq_im))]
pathList_im = [[int(i) for i in pathStr_im[j].split(', ')] for j in range(len(pathStr_im))]
pathStr_im = [" ".join([str(a) for a in pathList_im[j]]) for j in range(len(seq_im))]
seq_bf = seq_bf.reset_index()
pathStr_bf = [seq_bf.loc[i].path.strip('[').strip(']') for i in range(len(seq_bf))]
pathList_bf = [[int(i) for i in pathStr_bf[j].split(', ')] for j in range(len(pathStr_bf))]
pathStr_bf = [" ".join([str(a) for a in pathList_bf[j]]) for j in range(len(seq_bf))]
seq_af = seq_af.reset_index()
pathStr_af = [seq_af.loc[i].path.strip('[').strip(']') for i in range(len(seq_af))]
pathList_af = [[int(i) for i in pathStr_af[j].split(', ')] for j in range(len(pathStr_af))]
pathStr_af = [" ".join([str(a) for a in pathList_af[j]]) for j in range(len(seq_af))]
pathList_bfim = [np.setdiff1d(np.array(pathList_bf[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_bf))]
pathStr_bfim = [" ".join([str(a) for a in pathList_bfim[j]]) for j in range(len(pathList_im))]
pathList_afim = [np.setdiff1d(np.array(pathList_af[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_af))]
pathStr_afim = [" ".join([str(a) for a in pathList_afim[j]]) for j in range(len(pathList_im))]
currmas_dif = np.array(currmas_af) - np.array(currmas_bf)
endmas_dif = np.array(endmas_af) - np.array(endmas_bf)
currnos_dif = np.array(currnos_af) - np.array(currnos_bf)
leftover_dif = np.array(leftover_af) - np.array(leftover_bf)
rt_dif = np.array(RT_af) - np.array(RT_bf)
overlap_seq_2 = []
seq_inorder_2 = []
currmas_dif_2 = []
endmas_dif_2 = []
currnos_dif_2 = []
leftover_dif_2 = []
RT_dif_2 = []
for i in range(len(seq_af)):
temp = []
for j in range(i+1):
# temp.append(get_overlaplist(pathList_afim[i], pathList_bfim[j]))
temptemp = get_overlaplist(pathList_afim[i], pathList_bfim[j])
if not len(temptemp)==0:
temp.extend(temptemp)
temp = trimmer(temp)
if len(temp)==0:
temp.append([])
seq_inorder_2.append(temp)
# len_seq_inorder_2 = [len(s) for s in temp if len(s)!=0]
len_seq_inorder_2 =[]
tempcurd = []
tempendd = []
tempnosd = []
templeft = []
temprt = []
for s in temp:
# print('*')
if len(s)==0:
len_seq_inorder_2.append(0)
else:
len_seq_inorder_2.append(len(s))
tempcurd.append(currmas_dif[i])
tempendd.append(endmas_dif[i])
tempnosd.append(currnos_dif[i])
templeft.append(leftover_dif[i])
temprt.append(rt_dif[i])
overlap_seq_2.extend(len_seq_inorder_2)
currmas_dif_2.extend(tempcurd)
endmas_dif_2.extend(tempendd)
currnos_dif_2.extend(tempnosd)
leftover_dif_2.extend(templeft)
RT_dif_2.extend(temprt)
# print(overlap_seq_2)
# print(currmas_dif_2)
overlap_seq_inorder_for_puzzle.append([np.sum(np.array(overlap_seq_2)==0), np.sum(np.array(overlap_seq_2)!=0)])
overlap_seq_inorder_inlen_for_puzzle.append([np.sum(np.array(overlap_seq_2)==0), np.sum(np.array(overlap_seq_2)==1),
np.sum(np.array(overlap_seq_2)==2), np.sum(np.array(overlap_seq_2)>2),])
currmas_for_puzzle.append([np.sum(np.array(currmas_dif_2)[np.array(overlap_seq_2)==0]) , np.sum(np.array(currmas_dif_2)[np.array(overlap_seq_2)!=0]) ] )
currmas_inlen_for_puzzle.append([np.sum(np.array(currmas_dif_2)[np.array(overlap_seq_2)==0]), np.sum(np.array(currmas_dif_2)[np.array(overlap_seq_2)==1]),
np.sum(np.array(currmas_dif_2)[np.array(overlap_seq_2)==2]), np.sum(np.array(currmas_dif_2)[np.array(overlap_seq_2)>2]),])
endmas_for_puzzle.append([np.sum(np.array(endmas_dif_2)[np.array(overlap_seq_2)==0]) , np.sum(np.array(endmas_dif_2)[np.array(overlap_seq_2)!=0]) ] )
endmas_inlen_for_puzzle.append([np.sum(np.array(endmas_dif_2)[np.array(overlap_seq_2)==0]), np.sum(np.array(endmas_dif_2)[np.array(overlap_seq_2)==1]),
np.sum(np.array(endmas_dif_2)[np.array(overlap_seq_2)==2]), np.sum(np.array(endmas_dif_2)[np.array(overlap_seq_2)>2]),])
# print(overlap_seq_2)
length_seq.extend(overlap_seq_2)
currnos_for_puzzle.append([np.sum(np.array(currnos_dif_2)[np.array(overlap_seq_2)==0]) , np.sum(np.array(currnos_dif_2)[np.array(overlap_seq_2)!=0]) ] )
currnos_inlen_for_puzzle.append([np.sum(np.array(currnos_dif_2)[np.array(overlap_seq_2)==0]), np.sum(np.array(currnos_dif_2)[np.array(overlap_seq_2)==1]),
np.sum(np.array(currnos_dif_2)[np.array(overlap_seq_2)==2]), np.sum(np.array(currnos_dif_2)[np.array(overlap_seq_2)>2]),])
leftover_for_puzzle.append([np.sum(np.array(leftover_dif_2)[np.array(overlap_seq_2)==0]) , np.sum(np.array(leftover_dif_2)[np.array(overlap_seq_2)!=0]) ] )
leftover_inlen_for_puzzle.append([np.sum(np.array(leftover_dif_2)[np.array(overlap_seq_2)==0]), np.sum(np.array(leftover_dif_2)[np.array(overlap_seq_2)==1]),
np.sum(np.array(leftover_dif_2)[np.array(overlap_seq_2)==2]), np.sum(np.array(leftover_dif_2)[np.array(overlap_seq_2)>2]),])
RT_for_puzzle.append([np.sum(np.array(RT_dif_2)[np.array(overlap_seq_2)==0]) , np.sum(np.array(RT_dif_2)[np.array(overlap_seq_2)!=0]) ] )
RT_inlen_for_puzzle.append([np.sum(np.array(RT_dif_2)[np.array(overlap_seq_2)==0]), np.sum(np.array(RT_dif_2)[np.array(overlap_seq_2)==1]),
np.sum(np.array(RT_dif_2)[np.array(overlap_seq_2)==2]), np.sum(np.array(RT_dif_2)[np.array(overlap_seq_2)>2]),])
else:
overlap_seq_inorder_for_puzzle.append([np.nan,np.nan])
overlap_seq_inorder_inlen_for_puzzle.append([np.nan,np.nan,np.nan,np.nan])
currmas_for_puzzle.append([np.nan,np.nan])
currmas_inlen_for_puzzle.append([np.nan,np.nan,np.nan,np.nan])
endmas_for_puzzle.append([np.nan,np.nan])
endmas_inlen_for_puzzle.append([np.nan,np.nan,np.nan,np.nan])
currnos_for_puzzle.append([np.nan,np.nan])
currnos_inlen_for_puzzle.append([np.nan,np.nan,np.nan,np.nan])
leftover_for_puzzle.append([np.nan,np.nan])
leftover_inlen_for_puzzle.append([np.nan,np.nan,np.nan,np.nan])
RT_for_puzzle.append([np.nan,np.nan])
RT_inlen_for_puzzle.append([np.nan,np.nan,np.nan,np.nan])
overlap_seq_inorder_for_puzzle = np.array(overlap_seq_inorder_for_puzzle)
temp = overlap_seq_inorder_for_puzzle.copy()
overlap_seq_inorder.append(np.nansum(temp,axis=0))
overlap_seq_inorder_inlen_for_puzzle=np.array(overlap_seq_inorder_inlen_for_puzzle)
temp = overlap_seq_inorder_inlen_for_puzzle.copy()
overlap_seq_inorder_inlen.append(np.nansum(temp,axis=0))
currmas_for_puzzle = np.array(currmas_for_puzzle)
temp = currmas_for_puzzle.copy()
currmas.append(np.nansum(temp,axis=0))
currmas_inlen_for_puzzle=np.array(currmas_inlen_for_puzzle)
temp = currmas_inlen_for_puzzle.copy()
currmas_inlen.append(np.nansum(temp,axis=0))
endmas_for_puzzle = np.array(endmas_for_puzzle)
temp = endmas_for_puzzle.copy()
endmas.append(np.nansum(temp,axis=0))
endmas_inlen_for_puzzle=np.array(endmas_inlen_for_puzzle)
temp = endmas_inlen_for_puzzle.copy()
endmas_inlen.append(np.nansum(temp,axis=0))
currnos_for_puzzle = np.array(currnos_for_puzzle)
temp = currnos_for_puzzle.copy()
currnos.append(np.nansum(temp,axis=0))
currnos_inlen_for_puzzle=np.array(currnos_inlen_for_puzzle)
temp = currnos_inlen_for_puzzle.copy()
currnos_inlen.append(np.nansum(temp,axis=0))
leftover_for_puzzle = np.array(leftover_for_puzzle)
temp = leftover_for_puzzle.copy()
leftover.append(np.nansum(temp,axis=0))
leftover_inlen_for_puzzle=np.array(leftover_inlen_for_puzzle)
temp = leftover_inlen_for_puzzle.copy()
leftover_inlen.append(np.nansum(temp,axis=0))
RT_for_puzzle = np.array(RT_for_puzzle)
temp = RT_for_puzzle.copy()
RT.append(np.nansum(temp,axis=0))
RT_inlen_for_puzzle=np.array(RT_inlen_for_puzzle)
temp = RT_inlen_for_puzzle.copy()
RT_inlen.append(np.nansum(temp,axis=0))
overlap_seq_inorder = np.array(overlap_seq_inorder)
overlap_seq_inorder_inlen = np.array(overlap_seq_inorder_inlen)
currmas = np.array(currmas)
currmas_inlen = np.array(currmas_inlen)
endmas = np.array(endmas)
endmas_inlen = np.array(endmas_inlen)
leftover = np.array(leftover)
leftover_inlen = np.array(leftover_inlen)
currnos = np.array(currnos)
currnos_inlen = np.array(currnos_inlen)
RT = np.array(RT)
RT_inlen = np.array(RT_inlen)
overlap_seq_inorder_inlen_minlen2 = overlap_seq_inorder_inlen.copy()
overlap_seq_inorder_inlen_minlen2 = overlap_seq_inorder_inlen_minlen2[:,np.setdiff1d(range(overlap_seq_inorder_inlen_minlen2.shape[1]),1)]
overlap_seq_inorder_minlen2 = overlap_seq_inorder_inlen_minlen2.copy()
overlap_seq_inorder_minlen2 = np.array([overlap_seq_inorder_minlen2[:,0], np.sum(overlap_seq_inorder_minlen2[:,1:],axis=1) ]).transpose()
overlap_seq_inorder_p = overlap_seq_inorder/np.sum(overlap_seq_inorder,axis=1)[:,None]
overlap_seq_inorder_inlen_p = overlap_seq_inorder_inlen/np.sum(overlap_seq_inorder_inlen,axis=1)[:,None]
overlap_seq_inorder_minlen2_p = overlap_seq_inorder_minlen2/np.sum(overlap_seq_inorder_minlen2,axis=1)[:,None]
overlap_seq_inorder_inlen_minlen2_p = overlap_seq_inorder_inlen_minlen2/np.sum(overlap_seq_inorder_inlen_minlen2,axis=1)[:,None]
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:6: RuntimeWarning: invalid value encountered in true_divide /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in true_divide import sys /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in true_divide # Remove the CWD from sys.path while we load stuff. /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:11: RuntimeWarning: invalid value encountered in true_divide # This is added back by InteractiveShellApp.init_path()
currmas_p = np.divide(currmas, np.sum(overlap_seq_inorder,axis=1).reshape(-1,1))
currmas_inlen_p = np.divide(currmas_inlen, np.sum(overlap_seq_inorder_inlen,axis=1).reshape(-1,1))
endmas_p = np.divide(endmas, np.sum(overlap_seq_inorder,axis=1).reshape(-1,1))
endmas_inlen_p = np.divide(endmas_inlen, np.sum(overlap_seq_inorder_inlen,axis=1).reshape(-1,1))
currnos_p = np.divide(currnos, np.sum(overlap_seq_inorder,axis=1).reshape(-1,1))
currnos_inlen_p = np.divide(currnos_inlen, np.sum(overlap_seq_inorder_inlen,axis=1).reshape(-1,1))
leftover_p = np.divide(leftover, np.sum(overlap_seq_inorder,axis=1).reshape(-1,1))
leftover_inlen_p = np.divide(leftover_inlen, np.sum(overlap_seq_inorder_inlen,axis=1).reshape(-1,1))
RT_p = np.divide(RT, np.sum(overlap_seq_inorder,axis=1).reshape(-1,1))
RT_inlen_p = np.divide(RT_inlen, np.sum(overlap_seq_inorder_inlen,axis=1).reshape(-1,1))
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:1: RuntimeWarning: invalid value encountered in true_divide """Entry point for launching an IPython kernel. /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:2: RuntimeWarning: invalid value encountered in true_divide /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:4: RuntimeWarning: invalid value encountered in true_divide after removing the cwd from sys.path. /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:5: RuntimeWarning: invalid value encountered in true_divide """ /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in true_divide import sys /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:8: RuntimeWarning: invalid value encountered in true_divide /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in true_divide # Remove the CWD from sys.path while we load stuff. /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:11: RuntimeWarning: invalid value encountered in true_divide # This is added back by InteractiveShellApp.init_path() /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:13: RuntimeWarning: invalid value encountered in true_divide del sys.path[0] /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:14: RuntimeWarning: invalid value encountered in true_divide
currmas_inlen_minlen2 = currmas_inlen.copy()
currmas_inlen_minlen2 = currmas_inlen_minlen2[:,np.setdiff1d(range(currmas_inlen_minlen2.shape[1]),1)]
endmas_inlen_minlen2 = endmas_inlen.copy()
endmas_inlen_minlen2 = endmas_inlen_minlen2[:,np.setdiff1d(range(endmas_inlen_minlen2.shape[1]),1)]
currmas_minlen2 = currmas_inlen_minlen2.copy()
currmas_minlen2 = np.array([currmas_minlen2[:,0], np.sum(currmas_minlen2[:,1:],axis=1) ]).transpose()
endmas_minlen2 = endmas_inlen_minlen2.copy()
endmas_minlen2 = np.array([endmas_minlen2[:,0], np.sum(endmas_minlen2[:,1:],axis=1) ]).transpose()
currmas_minlen2_p = np.divide(currmas_minlen2, np.sum(overlap_seq_inorder_minlen2,axis=1).reshape(-1,1))
currmas_inlen_minlen2_p = np.divide(currmas_inlen_minlen2, np.sum(overlap_seq_inorder_inlen_minlen2,axis=1).reshape(-1,1))
endmas_minlen2_p = np.divide(endmas_minlen2, np.sum(overlap_seq_inorder_minlen2,axis=1).reshape(-1,1))
endmas_inlen_minlen2_p = np.divide(endmas_inlen_minlen2, np.sum(overlap_seq_inorder_inlen_minlen2,axis=1).reshape(-1,1))
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:12: RuntimeWarning: invalid value encountered in true_divide if sys.path[0] == "": /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:13: RuntimeWarning: invalid value encountered in true_divide del sys.path[0] /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:14: RuntimeWarning: invalid value encountered in true_divide /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:15: RuntimeWarning: invalid value encountered in true_divide from ipykernel import kernelapp as app
print(np.nanmean(overlap_seq_inorder_p,axis=0))
print(np.nanmean(overlap_seq_inorder_inlen_p,axis=0))
print(np.nanmean(overlap_seq_inorder_minlen2_p,axis=0))
print(np.nanmean(currmas_p,axis=0))
print(np.nanmean(currmas_inlen_p,axis=0))
print(np.nanmean(endmas_p,axis=0))
print(np.nanmean(endmas_inlen_p,axis=0))
print(np.nanmean(currmas_minlen2_p,axis=0))
print(np.nanmean(currmas_inlen_minlen2_p,axis=0))
print(np.nanmean(endmas_minlen2_p,axis=0))
print(np.nanmean(endmas_inlen_minlen2_p,axis=0))
[0.22208629 0.77791371] [0.22208629 0.45150286 0.15166049 0.17475036] [0.36477837 0.63522163] [0.06760932 0.28375106] [0.06760932 0.17354585 0.04429349 0.06591173] [0.08302076 0.38888562] [0.08302076 0.21388566 0.07050334 0.10449662] [0.07980241 0.21998494] [0.07980241 0.08993949 0.13004545] [0.10674695 0.34801712] [0.10674695 0.13866777 0.20934934]
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(6,4.5))
plt.hist(length_seq)
plt.title('Histogram of the length of overlapped seqs')
Text(0.5, 1.0, 'Histogram of the length of overlapped seqs')
stat12, p12 = wilcoxon(overlap_seq_inorder_p[:,0],overlap_seq_inorder_p[:,1])
print(stat12)
print(p12)
588.0 4.408507196523171e-11
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
bb = plt.bar(range(2), np.nanmean(overlap_seq_inorder_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(overlap_seq_inorder_p,axis = 0)/np.sqrt(overlap_seq_inorder_p.shape[0]))
plt.xticks([0,1], ['totally different','has some overlaps'])
plt.xlabel('comparison path before and after undoing')
#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
fig.savefig(out_dir + 'proportion_undo_different_overlaps.png', dpi=600, bbox_inches='tight')
stat34, p34 = wilcoxon(overlap_seq_inorder_inlen_p[:,2],overlap_seq_inorder_inlen_p[:,3])
print(stat34)
print(p34)
719.5 7.168079534710966e-05
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(8,6))
bb = plt.bar(range(4), np.nanmean(overlap_seq_inorder_inlen_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(overlap_seq_inorder_inlen_p,axis = 0)/np.sqrt(overlap_seq_inorder_inlen_p.shape[0]))
plt.xticks(range(4), ['totally \ndifferent','1','2\nlength of the overlapped sequence','3+'])
plt.xlabel('comparison path before and after undoing')
#statistics
x1, x2 = 2,3
y, h, col = np.max([bb[2].get_height(),bb[3].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)\
Text(2.5, 0.3247503579368857, '$p = 0.000000$')
stat12, p12 = wilcoxon(overlap_seq_inorder_minlen2_p[:,0],overlap_seq_inorder_minlen2_p[:,1])
print(stat12)
print(p12)
864.5 2.46882479492761e-07
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig= plt.figure(figsize=(6,4.5))
bb = plt.bar(range(2), np.nanmean(overlap_seq_inorder_minlen2_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(overlap_seq_inorder_minlen2_p,axis = 0)/np.sqrt(overlap_seq_inorder_minlen2_p.shape[0]))
plt.xticks([0,1], ['totally different','has some overlaps']) # length >= 2
plt.xlabel('comparison path before and after undoing')
#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
fig.savefig(out_dir + 'proportion_undo_different_overlaps_len2.png', dpi=600, bbox_inches='tight')
stat12, p12 = wilcoxon(overlap_seq_inorder_inlen_minlen2_p[:,1],overlap_seq_inorder_inlen_minlen2_p[:,2])
print(stat12)
print(p12)
701.5 1.3501305434343351e-06
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(8,6))
bb = plt.bar(range(3), np.nanmean(overlap_seq_inorder_inlen_minlen2_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(overlap_seq_inorder_inlen_minlen2_p,axis = 0)/np.sqrt(overlap_seq_inorder_inlen_minlen2_p.shape[0]))
plt.xticks([0,1,1.5,2], ['totally \ndifferent','2','\nlength of the overlapped sequence','3+'])
plt.xlabel('comparison path before and after undoing')
#statistics
x1, x2 = 1,2
y, h, col = np.max([bb[1].get_height(),bb[2].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
fig.savefig(out_dir + 'proportion_undo_different_overlaps_bylen_len2.png', dpi=600, bbox_inches='tight')

(figure to help understanding) MAS diff between af and bf
## check variance
print(np.nanstd(currmas_p[:,0], ddof=1))
print(np.nanstd(currmas_p[:,1], ddof=1))
## check normality
sm.qqplot(currmas_p[~np.isnan(currmas_p[:,0]),0], line='s')
py.show()
sm.qqplot(currmas_p[~np.isnan(currmas_p[:,1]),1], line='s')
py.show()
0.3554695787846406 0.3401033696614426
# find the index that currmas_p is not nan
idx = np.where(~np.isnan(currmas_p[:,0]))[0]
stat12, p12 = ttest_ind(currmas_p[idx,0],currmas_p[idx,1])
print(stat12)
print(p12)
-4.167986165569915 4.789498712713116e-05
stat12, p12 = wilcoxon(currmas_p[:,0],currmas_p[:,1])
print(stat12)
print(p12)
613.0 3.66918192925141e-10
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
bb = plt.bar(range(2), np.nanmean(currmas_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(currmas_p,axis = 0)/np.sqrt(currmas_p.shape[0]))
plt.xticks([0,1], ['totally different','has some overlaps'])
# plt.xlabel('mas difference between child cities of branching city af and bf undo')
plt.ylabel('Maximum achivable score difference around branching city')
#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
fig.savefig(out_dir + 'proportion_mas_different_overlaps.png', dpi=600, bbox_inches='tight')
## check variance
print(np.nanstd(currmas_inlen_p[:,2], ddof=1))
print(np.nanstd(currmas_inlen_p[:,3], ddof=1))
## check normality
sm.qqplot(currmas_inlen_p[~np.isnan(currmas_inlen_p[:,2]),0], line='s')
py.show()
sm.qqplot(currmas_inlen_p[~np.isnan(currmas_inlen_p[:,3]),1], line='s')
py.show()
0.08867834829644963 0.10264900388789816
stat23, p23 = wilcoxon(currmas_inlen_p[:,2],currmas_inlen_p[:,3])
print(stat23)
print(p23)
716.0 0.00010992678664051827
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(8,6))
bb = plt.bar(range(4), np.nanmean(currmas_inlen_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(currmas_inlen_p,axis = 0)/np.sqrt(currmas_inlen_p.shape[0]))
plt.xticks(range(4), ['totally \ndifferent','1','2\nlength of the overlapped sequence','3+'])
plt.xlabel('mas difference between child cities of branching city af and bf undo')
#statistics
x1, x2 = 2,3
y, h, col = np.max([bb[2].get_height(),bb[3].get_height()]) + 0.05, 0.005, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p23), ha='center', va='bottom', color=col, fontsize = 8)
Text(2.5, 0.12091173000959742, '$p = 0.000110$')
means that new path is better with some overlaps
## check variance
print(np.nanstd(currmas_minlen2_p[:,0], ddof=1))
print(np.nanstd(currmas_minlen2_p[:,1], ddof=1))
## check normality
sm.qqplot(currmas_minlen2_p[~np.isnan(currmas_minlen2_p[:,0]),0], line='s')
py.show()
sm.qqplot(currmas_minlen2_p[~np.isnan(currmas_minlen2_p[:,1]),1], line='s')
py.show()
0.40523739028006 0.26013279783163734
stat12, p12 = wilcoxon(currmas_minlen2_p[:,0],currmas_minlen2_p[:,1])
print(stat12)
print(p12)
702.0 7.857020484842883e-09
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
bb = plt.bar(range(2), np.nanmean(currmas_minlen2_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(currmas_minlen2_p,axis = 0)/np.sqrt(currmas_minlen2_p.shape[0]))
plt.xticks([0,1], ['totally different','has some overlaps']) # (include len>=2)
plt.xlabel('mas difference between child cities of branching city af and bf undo')
#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
fig.savefig(out_dir + 'proportion_mas_different_overlaps_len2.png', dpi=600, bbox_inches='tight')
stat12, p12 = ttest_ind(currmas_inlen_minlen2_p[~np.isnan(currmas_inlen_minlen2_p[:,0]),0],currmas_inlen_minlen2_p[~np.isnan(currmas_inlen_minlen2_p[:,0]),1])
print(stat12)
print(p12)
-0.21086634567790852 0.8332533464873775
stat12, p12 = wilcoxon(currmas_inlen_minlen2_p[:,0],currmas_inlen_minlen2_p[:,1])
stat23, p23 = wilcoxon(currmas_inlen_minlen2_p[:,1],currmas_inlen_minlen2_p[:,2])
print(stat12)
print(p12)
print(stat23)
print(p23)
997.5 1.4510418096217376e-05 730.0 4.24013043843014e-06
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(8,6))
bb = plt.bar(range(3), np.nanmean(currmas_inlen_minlen2_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(currmas_inlen_minlen2_p,axis = 0)/np.sqrt(currmas_inlen_minlen2_p.shape[0]))
# plt.xticks(range(3), ['totally \ndifferent','len=2','len>=3'])
plt.xticks([0,1,1.5,2], ['totally \ndifferent','2','\nlength of the overlapped sequence','3+'])
plt.xlabel('mas difference between child cities of branching city af and bf undo')
#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.05, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
#statistics
x1, x2 = 1,2
y, h, col = np.max([bb[2].get_height(),bb[1].get_height()]) + 0.05, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p23), ha='center', va='bottom', color=col, fontsize = 8)
fig.savefig(out_dir + 'proportion_mas_different_overlaps_bylen_len2.png', dpi=600, bbox_inches='tight')
means that new path is better with some overlaps

(figure to help understanding) MAS diff between af and bf
## check variance
print(np.nanstd(endmas_p[:,0], ddof=1))
print(np.nanstd(endmas_p[:,1], ddof=1))
## check normality
sm.qqplot(endmas_p[~np.isnan(endmas_p[:,0]),0], line='s')
py.show()
sm.qqplot(endmas_p[~np.isnan(endmas_p[:,1]),1], line='s')
py.show()
0.46010317708591597 0.3441129564856905
stat12, p12 = wilcoxon(endmas_p[:,0],endmas_p[:,1])
print(stat12)
print(p12)
444.5 5.858228574182619e-12
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(6,4.5))
bb = plt.bar(range(2), np.nanmean(endmas_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(endmas_p,axis = 0)/np.sqrt(endmas_p.shape[0]))
plt.xticks([0,1], ['totally different','has some overlaps'])
plt.xlabel('mas difference between cities at the end af and bf undo')
#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
Text(0.5, 0.5388856241387392, '$p = 0.000000$')
stat12, p12 = wilcoxon(endmas_inlen_p[:,2],endmas_inlen_p[:,3])
print(stat12)
print(p12)
539.0 1.7191433666615128e-06
%matplotlib notebook
plt.figure()
bb = plt.bar(range(4), np.nanmean(endmas_inlen_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(endmas_inlen_p,axis = 0)/np.sqrt(endmas_inlen_p.shape[0]))
plt.xticks(range(4), ['totally \ndifferent','len=1','len=2','len>=3'])
plt.xlabel('mas difference between cities at the end af and bf undo')
#statistics
x1, x2 = 2,3
y, h, col = np.max([bb[2].get_height(),bb[3].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
Text(2.5, 0.2544966193247388, '$p = 0.000002$')
stat12, p12 = wilcoxon(endmas_minlen2_p[:,0],endmas_minlen2_p[:,1])
print(stat12)
print(p12)
531.5 2.334585044941607e-10
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(6,4.5))
bb = plt.bar(range(2), np.nanmean(endmas_minlen2_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(endmas_minlen2_p,axis = 0)/np.sqrt(endmas_minlen2_p.shape[0]))
plt.xticks([0,1], ['totally different','has some overlaps']) #(includer len>=2)
plt.xlabel('mas difference between cities at the end af and bf undo')
#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.05, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
Text(0.5, 0.498017117274624, '$p = 0.000000$')
stat12, p12 = wilcoxon(endmas_inlen_minlen2_p[:,0],endmas_inlen_minlen2_p[:,1])
stat23, p23 = wilcoxon(endmas_inlen_minlen2_p[:,1],endmas_inlen_minlen2_p[:,2])
print(stat12)
print(p12)
print(stat23)
print(p23)
976.5 5.988959268758323e-06 536.5 4.308377997714307e-08
%matplotlib notebook
plt.figure()
bb = plt.bar(range(3), np.nanmean(endmas_inlen_minlen2_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(endmas_inlen_minlen2_p,axis = 0)/np.sqrt(endmas_inlen_minlen2_p.shape[0]))
plt.xticks(range(3), ['totally \ndifferent','len=2','len>=3'])
plt.xlabel('mas difference between cities at the end af and bf undo')
#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.05, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
#statistics
x1, x2 = 1,2
y, h, col = np.max([bb[2].get_height(),bb[1].get_height()]) + 0.05, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p23), ha='center', va='bottom', color=col, fontsize = 8)
Text(1.5, 0.27934934369358494, '$p = 0.000000$')
currnos_inlen_minlen2 = currnos_inlen.copy()
currnos_inlen_minlen2 = currnos_inlen_minlen2[:,np.setdiff1d(range(currnos_inlen_minlen2.shape[1]),1)]
currnos_minlen2 = currnos_inlen_minlen2.copy()
currnos_minlen2 = np.array([currnos_minlen2[:,0], np.sum(currnos_minlen2[:,1:],axis=1) ]).transpose()
currnos_minlen2_p = np.divide(currnos_minlen2, np.sum(overlap_seq_inorder_minlen2,axis=1).reshape(-1,1))
currnos_inlen_minlen2_p = np.divide(currnos_inlen_minlen2, np.sum(overlap_seq_inorder_inlen_minlen2,axis=1).reshape(-1,1))
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in true_divide import sys /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:8: RuntimeWarning: invalid value encountered in true_divide
## check variance
print(np.nanstd(currnos_minlen2_p[:,0], ddof=1))
print(np.nanstd(currnos_minlen2_p[:,1], ddof=1))
## check normality
sm.qqplot(currnos_minlen2_p[~np.isnan(currnos_minlen2_p[:,0]),0], line='s')
py.show()
sm.qqplot(currnos_minlen2_p[~np.isnan(currnos_minlen2_p[:,1]),1], line='s')
py.show()
1.385248629690381 7.83873735380135
stat12, p12 = ttest_ind(currnos_minlen2_p[~np.isnan(currnos_minlen2_p[:,0]),0],currnos_minlen2_p[~np.isnan(currnos_minlen2_p[:,0]),1])
print(stat12)
print(p12)
2.6869335886670407 0.007954071465558715
stat12, p12 = wilcoxon(currnos_minlen2_p[:,0],currnos_minlen2_p[:,1])
print(stat12)
print(p12)
974.0 4.498452277893566e-07
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
bb = plt.bar(range(2), np.nanmean(currnos_minlen2_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(currnos_minlen2_p,axis = 0)/np.sqrt(currnos_minlen2_p.shape[0]))
plt.xticks([0,1], ['totally different','has some overlaps'])
plt.xlabel('nos difference between cities at the end af and bf undo')
#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.1, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
fig.savefig(out_dir + 'proportion_nos_different_overlaps_len2.png', dpi=600, bbox_inches='tight')
stat12, p12 = wilcoxon(currnos_inlen_minlen2_p[:,1],currnos_inlen_minlen2_p[:,2])
print(stat12)
print(p12)
1112.5 0.0004347880090769754
%matplotlib notebook
fig = plt.figure()
bb = plt.bar(range(3), np.nanmean(currnos_inlen_minlen2_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(currnos_inlen_minlen2_p,axis = 0)/np.sqrt(currnos_inlen_minlen2_p.shape[0]))
plt.xticks(range(3), ['totally \ndifferent','len=2','len>=3'])
plt.xlabel('nos difference between cities at the end af and bf undo')
# #statistics
# x1, x2 = 0,1
# y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.05, 0.02, 'k'
# plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
# plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
#statistics
x1, x2 = 1,2
y, h, col = 0.05, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
fig.savefig(out_dir + 'proportion_nos_different_overlaps_bylen_len2.png', dpi=600, bbox_inches='tight')
leftover_inlen_minlen2 = leftover_inlen.copy()
leftover_inlen_minlen2 = leftover_inlen_minlen2[:,np.setdiff1d(range(leftover_inlen_minlen2.shape[1]),1)]
leftover_minlen2 = leftover_inlen_minlen2.copy()
leftover_minlen2 = np.array([leftover_minlen2[:,0], np.sum(leftover_minlen2[:,1:],axis=1) ]).transpose()
leftover_minlen2_p = np.divide(leftover_minlen2, np.sum(overlap_seq_inorder_minlen2,axis=1).reshape(-1,1))
leftover_inlen_minlen2_p = np.divide(leftover_inlen_minlen2, np.sum(overlap_seq_inorder_inlen_minlen2,axis=1).reshape(-1,1))
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in true_divide import sys /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:8: RuntimeWarning: invalid value encountered in true_divide
print(np.nanmean(leftover_minlen2_p,axis=0))
print(np.nanmean(leftover_inlen_minlen2_p,axis=0))
[-11.01847968 -11.14767781] [-11.01847968 -5.00073805 -6.14693976]
stat12, p12 = wilcoxon(leftover_minlen2_p[:,0],leftover_minlen2_p[:,1])
print(stat12)
print(p12)
1316.0 3.225460839514926e-05
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(6,4.5))
bb = plt.bar(range(2), np.nanmean(leftover_minlen2_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(leftover_minlen2_p,axis = 0)/np.sqrt(leftover_minlen2_p.shape[0]))
plt.xticks([0,1], ['totally different','has some overlaps'])
plt.xlabel('leftover difference between cities at the end af and bf undo')
#statistics
x1, x2 = 0,1
y, h, col = 4, 1, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
Text(0.5, 5, '$p = 0.000032$')
%matplotlib notebook
fig = plt.figure()
bb = plt.bar(range(3), np.nanmean(leftover_inlen_minlen2_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(leftover_inlen_minlen2_p,axis = 0)/np.sqrt(leftover_inlen_minlen2_p.shape[0]))
plt.xticks(range(3), ['totally \ndifferent','len=2','len>=3'])
plt.xlabel('leftover difference between cities at the end af and bf undo')
#statistics
x1, x2 = 0,1
y, h, col = 1, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
#statistics
x1, x2 = 1,2
y, h, col = 1, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p23), ha='center', va='bottom', color=col, fontsize = 8)
fig.savefig(out_dir + 'proportion_leftover_different_overlaps_bylen_len2.png', dpi=600, bbox_inches='tight')
RT_inlen_minlen2 = RT_inlen.copy()
RT_inlen_minlen2 = RT_inlen_minlen2[:,np.setdiff1d(range(RT_inlen_minlen2.shape[1]),1)]
RT_minlen2 = RT_inlen_minlen2.copy()
RT_minlen2 = np.array([RT_minlen2[:,0], np.sum(RT_minlen2[:,1:],axis=1) ]).transpose()
RT_minlen2_p = np.divide(RT_minlen2, np.sum(overlap_seq_inorder_minlen2,axis=1).reshape(-1,1))
RT_inlen_minlen2_p = np.divide(RT_inlen_minlen2, np.sum(overlap_seq_inorder_inlen_minlen2,axis=1).reshape(-1,1))
RT_minlen2_p /= 1000
RT_inlen_minlen2_p /= 1000
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in true_divide import sys /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:8: RuntimeWarning: invalid value encountered in true_divide
print(np.nanmean(RT_minlen2_p,axis=0))
print(np.nanmean(RT_inlen_minlen2_p,axis=0))
[0.12435565 1.2240461 ] [0.12435565 0.52759115 0.69645495]
stat12, p12 = wilcoxon(RT_minlen2_p[:,0],RT_minlen2_p[:,1])
print(stat12)
print(p12)
229.0 2.917295855137348e-15
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(6,4.5))
bb = plt.bar(range(2), np.nanmean(RT_minlen2_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(RT_minlen2_p,axis = 0)/np.sqrt(RT_minlen2_p.shape[0]))
plt.xticks([0,1], ['totally different','has some overlaps'])
plt.xlabel('RT difference between cities at the end af and bf undo')
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.2, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
Text(0.5, 1.4440460968260245, '$p = 0.000000$')
stat12, p12 = wilcoxon(RT_inlen_minlen2_p[:,0],RT_inlen_minlen2_p[:,1])
stat23, p23 = wilcoxon(RT_inlen_minlen2_p[:,2],RT_inlen_minlen2_p[:,1])
print(stat12)
print(p12)
print(p23)
705.0 3.905682059423199e-10 2.3376789112987052e-06
%matplotlib notebook
plt.figure()
bb = plt.bar(range(3), np.nanmean(RT_inlen_minlen2_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(RT_inlen_minlen2_p,axis = 0)/np.sqrt(RT_inlen_minlen2_p.shape[0]))
plt.xticks(range(3), ['totally \ndifferent','len=2','len>=3'])
plt.xlabel('leftover difference between cities at the end af and bf undo')
#statistics
x1, x2 = 0,1
y, h, col = 1, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
#statistics
x1, x2 = 1,2
y, h, col = 1, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p23), ha='center', va='bottom', color=col, fontsize = 8)
Text(1.5, 1.02, '$p = 0.000002$')
import copy
def get_tortuosity(pathList, pzi = 15):
def cal_dist(xy1, xy2):
return np.sqrt(np.sum((np.array(xy1)-np.array(xy2))**2))
pathList_ = copy.deepcopy(pathList)
### for the def
import json
with open('./util/basicMap.json','rb') as f:
basic_map = json.load(f)
tortuosity = []
###
map_pz = basic_map[pzi]
import numpy as np
for _ in range(len(pathList_)):
path = pathList_.pop(0)
if len(path)>1:
ct_init = path.pop(0)
# print(path)
# print('init{}_tgt{}'.format(ct_init, path[-1]))
# print('map_init{}_map_tgt{}'.format(map_pz['xy'][ct_init], map_pz['xy'][path[-1]]))
d_straight = cal_dist(map_pz['xy'][ct_init], map_pz['xy'][path[-1]])
d_zigzag = 0
for _ in range(len(path)):
ct_tgt = path.pop(0)
d_zigzag += cal_dist(map_pz['xy'][ct_init], map_pz['xy'][ct_tgt])
ct_init = ct_tgt
tortuosity.append([d_zigzag, d_straight])
else:
tortuosity.append([np.nan, np.nan])
return np.array(tortuosity)
import difflib
def get_overlap(s1, s2):
s = difflib.SequenceMatcher(None, s1, s2)
pos_a, pos_b, size = s.find_longest_match(0, len(s1), 0, len(s2))
return s1[pos_a:pos_a+size]
def get_overlaplist(l1,l2,minlen=1):
matched_seq = []
for i1 in range(len(l1)):
# print('l1:{}'.format(i1))
matches = [i2 for i2 in range(len(l2)) if l2[i2] == l1[i1]]
# print(matches)
for m in matches:
matched_seq_ = []
i_a = 0
while ((i1+i_a)<len(l1)) and ((m+i_a)< len(l2)):
if l1[i1+i_a]==l2[m+i_a]:
matched_seq_.append(l1[i1+i_a])
i_a += 1
else:
break
matched_seq.append(matched_seq_)
idx = np.argsort([len(seq) for seq in matched_seq])[::-1]
# print(idx)
seqs_ = []
for i in idx:
if len(seqs_) == 0:
seqs_.append(matched_seq[i])
else:
if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
seqs_.append(matched_seq[i])
matched_seq = seqs_.copy()
idx = np.argsort([len(seq) for seq in matched_seq])
seqs_ = []
for i in idx:
if len(seqs_) == 0:
seqs_.append(matched_seq[i])
else:
if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
seqs_.append(matched_seq[i])
seqs = []
for seq in seqs_:
# print(len(seq))
if not len(seq)<minlen:
seqs.append(seq)
return seqs
def trimmer(matched_seq,minlen=1):
idx = np.argsort([len(seq) for seq in matched_seq])[::-1]
# print(idx)
seqs_ = []
for i in idx:
if len(seqs_) == 0:
seqs_.append(matched_seq[i])
else:
if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
seqs_.append(matched_seq[i])
matched_seq = seqs_.copy()
idx = np.argsort([len(seq) for seq in matched_seq])
seqs_ = []
for i in idx:
if len(seqs_) == 0:
seqs_.append(matched_seq[i])
else:
if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
seqs_.append(matched_seq[i])
seqs = []
for seq in seqs_:
# print(len(seq))
if not len(seq)<minlen:
seqs.append(seq)
return seqs
undo_zigzag = []
undo_zigzag_diff =[]
undo_zigzag_diff_inlen =[]
overlap_seq_inorder = []
overlap_seq_inorder_inlen=[]
length_seq = []
corr_ = []
for sub in range(100):
dat_sbj = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
undo_zigzag_puzzle = []
undo_zigzag_diff_for_puzzle = []
undo_zigzag_diff_inlen_for_puzzle = []
overlap_seq_inorder_for_puzzle = []
overlap_seq_inorder_inlen_for_puzzle=[]
corr_puzzle_ =[]
for pzi in np.unique(sc_data_choice_level['puzzleID']):
dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()
firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
submit_idx = pd.Int64Index([len(dat_sbj_pzi)-1])
if len(firstUndo_idx)>0:
if len(firstUndo_idx)==1: #
seq_bf = dat_sbj_pzi["path"][firstUndo_idx-1]
seq_af = dat_sbj_pzi["path"][submit_idx]
seq_im = dat_sbj_pzi["path"][lastUndo_idx]
currmas_bf = dat_sbj_pzi["currMas"][lastUndo_idx-1]
currmas_af = dat_sbj_pzi["currMas"][lastUndo_idx+1]
else:
seq_bf = dat_sbj_pzi["path"][firstUndo_idx-1]
t_idx = (firstUndo_idx[1:]-1).to_list()
t_idx.append(submit_idx.item())
seq_af = dat_sbj_pzi["path"][t_idx]
seq_im = dat_sbj_pzi["path"][lastUndo_idx]
currmas_bf = dat_sbj_pzi["currMas"][lastUndo_idx-1]
currmas_af = dat_sbj_pzi["currMas"][lastUndo_idx+1]
seq_im = seq_im.reset_index()
pathStr_im = [seq_im.loc[i].path.strip('[').strip(']') for i in range(len(seq_im))]
pathList_im = [[int(i) for i in pathStr_im[j].split(', ')] for j in range(len(pathStr_im))]
pathStr_im = [" ".join([str(a) for a in pathList_im[j]]) for j in range(len(seq_im))]
seq_bf = seq_bf.reset_index()
pathStr_bf = [seq_bf.loc[i].path.strip('[').strip(']') for i in range(len(seq_bf))]
pathList_bf = [[int(i) for i in pathStr_bf[j].split(', ')] for j in range(len(pathStr_bf))]
pathStr_bf = [" ".join([str(a) for a in pathList_bf[j]]) for j in range(len(seq_bf))]
seq_af = seq_af.reset_index()
pathStr_af = [seq_af.loc[i].path.strip('[').strip(']') for i in range(len(seq_af))]
pathList_af = [[int(i) for i in pathStr_af[j].split(', ')] for j in range(len(pathStr_af))]
pathStr_af = [" ".join([str(a) for a in pathList_af[j]]) for j in range(len(seq_af))]
pathList_bfim = [np.setdiff1d(np.array(pathList_bf[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_bf))]
pathStr_bfim = [" ".join([str(a) for a in pathList_bfim[j]]) for j in range(len(pathList_im))]
pathList_afim = [np.setdiff1d(np.array(pathList_af[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_af))]
pathStr_afim = [" ".join([str(a) for a in pathList_afim[j]]) for j in range(len(pathList_im))]
tor_bf = get_tortuosity(pathList_bfim, pzi)
path_bf_undo = (tor_bf[:,0]/tor_bf[:,1])
tor_af = get_tortuosity(pathList_afim, pzi)
path_af_undo = (tor_af[:,0]/tor_af[:,1])
zigzag_diff = np.array(path_af_undo) - np.array(path_bf_undo)
currmas_diff = np.array(currmas_af) - np.array(currmas_bf)
if len(zigzag_diff) != len(currmas_diff):
print(zigzag_diff, currmas_diff)
corr_puzzle_.append([zigzag_diff, currmas_diff])
overlap_seq_2 = []
zigzag_diff_2 = []
seq_inorder_2 = []
for i in range(len(seq_af)):
temp = []
for j in range(i+1):
# temp.append(get_overlaplist(pathList_afim[i], pathList_bfim[j]))
temptemp = get_overlaplist(pathList_afim[i], pathList_bfim[j])
if not len(temptemp)==0:
temp.extend(temptemp)
temp = trimmer(temp)
if len(temp)==0:
temp.append([])
seq_inorder_2.append(temp)
# len_seq_inorder_2 = [len(s) for s in temp if len(s)!=0]
len_seq_inorder_2 =[]
tempzigd = []
for s in temp:
# print('*')
if len(s)==0:
len_seq_inorder_2.append(0)
else:
len_seq_inorder_2.append(len(s))
tempzigd.append(zigzag_diff[i])
overlap_seq_2.extend(len_seq_inorder_2)
zigzag_diff_2.extend(tempzigd)
# I think it doesn't mean it choose different path, it means at least once the chosen city is different after undo
if np.any(np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])):
idxx = np.array(dat_sbj_pzi["choice"][lastUndo_idx-1]) != np.array(dat_sbj_pzi["choice"][lastUndo_idx+1])
undo_zigzag_puzzle.extend(np.sign(np.array(path_af_undo[idxx]) - np.array(path_bf_undo[idxx])))
overlap_seq_inorder_for_puzzle.append([np.sum(np.array(overlap_seq_2)==0), np.sum(np.array(overlap_seq_2)!=0)])
overlap_seq_inorder_inlen_for_puzzle.append([np.sum(np.array(overlap_seq_2)==0), np.sum(np.array(overlap_seq_2)==1),
np.sum(np.array(overlap_seq_2)==2), np.sum(np.array(overlap_seq_2)>2),])
length_seq.extend(overlap_seq_2)
undo_zigzag_diff_for_puzzle.append([np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)==0]) , np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)!=0]) ] )
undo_zigzag_diff_inlen_for_puzzle.append([np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)==0]), np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)==1]),
np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)==2]), np.sum(np.array(zigzag_diff_2)[np.array(overlap_seq_2)>2]),])
# else:
# overlap_seq_inorder_for_puzzle.append([np.nan,np.nan])
# overlap_seq_inorder_inlen_for_puzzle.append([np.nan,np.nan,np.nan,np.nan])
overlap_seq_inorder_for_puzzle = np.array(overlap_seq_inorder_for_puzzle)
temp = overlap_seq_inorder_for_puzzle.copy()
# overlap_seq_inorder.append(np.nansum(temp,axis=0))
# print('*'*10)
if len(temp) == 0:
overlap_seq_inorder.append(np.zeros((2)))
else:
overlap_seq_inorder.append(np.nansum(temp,axis=0))
overlap_seq_inorder_inlen_for_puzzle=np.array(overlap_seq_inorder_inlen_for_puzzle)
temp = overlap_seq_inorder_inlen_for_puzzle.copy()
if len(temp) == 0:
overlap_seq_inorder_inlen.append(np.zeros((4)))
else:
overlap_seq_inorder_inlen.append(np.nansum(temp,axis=0))
undo_zigzag_puzzle = np.array(undo_zigzag_puzzle)
# undo_zigzag.append([np.sum(undo_zigzag_puzzle<0), np.sum(undo_zigzag_puzzle==0) ,np.sum(undo_zigzag_puzzle>0)])
undo_zigzag.append([np.sum(undo_zigzag_puzzle<0) ,np.sum(undo_zigzag_puzzle>0)])
undo_zigzag_diff_for_puzzle=np.array(undo_zigzag_diff_for_puzzle)
temp = undo_zigzag_diff_for_puzzle.copy()
if len(temp) == 0:
undo_zigzag_diff.append(np.zeros((2)))
else:
undo_zigzag_diff.append(np.nansum(temp,axis=0))
undo_zigzag_diff_inlen_for_puzzle=np.array(undo_zigzag_diff_inlen_for_puzzle)
temp = undo_zigzag_diff_inlen_for_puzzle.copy()
if len(temp) == 0:
undo_zigzag_diff_inlen.append(np.zeros((4)))
else:
undo_zigzag_diff_inlen.append(np.nansum(temp,axis=0))
c_0 = []
c_1 = []
for c in corr_puzzle_:
c_0.extend(c[0] )
c_1.extend(c[1] )
from scipy.stats import pearsonr
index = np.intersect1d(np.where(~np.isnan(c_0))[0], np.where(~np.isnan(c_1))[0])
try:
r,p=pearsonr(np.array(c_0)[index], np.array(c_1)[index])
corr_.append(r)
except:
''
undo_zigzag = np.array(undo_zigzag)
undo_zigzag_diff = np.array(undo_zigzag_diff)
undo_zigzag_diff_inlen = np.array(undo_zigzag_diff_inlen)
overlap_seq_inorder = np.array(overlap_seq_inorder)
overlap_seq_inorder_inlen = np.array(overlap_seq_inorder_inlen)
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/scipy/stats/stats.py:4023: PearsonRConstantInputWarning: An input array is constant; the correlation coefficient is not defined. warnings.warn(PearsonRConstantInputWarning())
np.nanmean(np.array(corr_)[np.where(~np.isnan(corr_))[0]])
0.22923986038665958
data1=np.array(corr_)[np.where(~np.isnan(corr_))[0]]
ttest_ind(data1,np.zeros(data1.shape))
Ttest_indResult(statistic=5.464790186633249, pvalue=2.0233842347730814e-07)
from scipy.stats import pearsonr
c_0 = []
c_1 = []
for c in corr_puzzle_:
c_0.extend(c[0] )
c_1.extend(c[1] )
overlap_seq_inorder_inlen_minlen2 = overlap_seq_inorder_inlen.copy()
overlap_seq_inorder_inlen_minlen2 = overlap_seq_inorder_inlen_minlen2[:,np.setdiff1d(range(overlap_seq_inorder_inlen_minlen2.shape[1]),1)]
overlap_seq_inorder_minlen2 = overlap_seq_inorder_inlen_minlen2.copy()
overlap_seq_inorder_minlen2 = np.array([overlap_seq_inorder_minlen2[:,0], np.sum(overlap_seq_inorder_minlen2[:,1:],axis=1) ]).transpose()
overlap_seq_inorder_p = overlap_seq_inorder/np.sum(overlap_seq_inorder,axis=1)[:,None]
overlap_seq_inorder_inlen_p = overlap_seq_inorder_inlen/np.sum(overlap_seq_inorder_inlen,axis=1)[:,None]
overlap_seq_inorder_minlen2_p = overlap_seq_inorder_minlen2/np.sum(overlap_seq_inorder_minlen2,axis=1)[:,None]
overlap_seq_inorder_inlen_minlen2_p = overlap_seq_inorder_inlen_minlen2/np.sum(overlap_seq_inorder_inlen_minlen2,axis=1)[:,None]
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:6: RuntimeWarning: invalid value encountered in true_divide /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in true_divide import sys /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in true_divide # Remove the CWD from sys.path while we load stuff. /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:11: RuntimeWarning: invalid value encountered in true_divide # This is added back by InteractiveShellApp.init_path()
undo_zigzag_diff_inlen_minlen2 = undo_zigzag_diff_inlen.copy()
undo_zigzag_diff_inlen_minlen2 = undo_zigzag_diff_inlen_minlen2[:,np.setdiff1d(range(undo_zigzag_diff_inlen_minlen2.shape[1]),1)]
undo_zigzag_diff_minlen2 = undo_zigzag_diff_inlen_minlen2.copy()
undo_zigzag_diff_minlen2 = np.array([undo_zigzag_diff_minlen2[:,0], np.sum(undo_zigzag_diff_minlen2[:,1:],axis=1) ]).transpose()
undo_zigzag_diff_minlen2_p = undo_zigzag_diff_minlen2/np.sum(undo_zigzag_diff_minlen2,axis=1)[:,None]
undo_zigzag_diff_inlen_minlen2_p = undo_zigzag_diff_inlen_minlen2/np.sum(undo_zigzag_diff_inlen_minlen2,axis=1)[:,None]
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:6: RuntimeWarning: invalid value encountered in true_divide /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in true_divide import sys
# exclude some never undoing subjects
undo_zigzag = undo_zigzag[np.where(np.sum(np.array(undo_zigzag),axis=1)!=0),:]
undo_zigzag = undo_zigzag.squeeze()
undo_zigzag_p = undo_zigzag/ np.sum(undo_zigzag,axis = 1)[:,None]
stat12, p12 = wilcoxon(undo_zigzag_diff_minlen2_p[:,0],undo_zigzag_diff_minlen2_p[:,1])
print(stat12)
print(p12)
666.0 1.6278452784027353e-10
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(6,4.5))
bb = plt.bar(range(2), np.nanmean(undo_zigzag_diff_minlen2_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(undo_zigzag_diff_minlen2_p,axis = 0)/np.sqrt(undo_zigzag_diff_minlen2_p.shape[0]))
plt.xticks([0,1], ['totally different','has some cities that overlaps'])
plt.xlabel('zigzagness difference between child cities of branching city af and bf undo')
#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.8, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
fig.savefig(out_dir + 'proportion_zigznagness_different_overlaps_len2.png', dpi=600, bbox_inches='tight')
stat12, p12 = wilcoxon(undo_zigzag_diff_inlen_minlen2_p[:,0],undo_zigzag_diff_inlen_minlen2_p[:,1])
stat23, p23 = wilcoxon(undo_zigzag_diff_inlen_minlen2_p[:,1],undo_zigzag_diff_inlen_minlen2_p[:,2])
print(stat12)
print(p12)
print(stat23)
print(p23)
1132.0 2.76521429315202e-06 818.0 9.607948130467323e-08
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
fig = plt.figure(figsize=(8,6))
bb = plt.bar(range(3), np.nanmean(undo_zigzag_diff_inlen_minlen2_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(undo_zigzag_diff_inlen_minlen2_p,axis = 0)/np.sqrt(undo_zigzag_diff_inlen_minlen2_p.shape[0]))
# plt.xticks(range(3), ['totally \ndifferent','len=2','len>=3'])
plt.xticks([0,1,1.5,2], ['totally \ndifferent','2','\nlength of the overlapped sequence','3+'])
plt.xlabel('zigzagness difference between child cities of branching city af and bf undo')
#statistics
x1, x2 = 0,1
y, h, col = np.max([bb[0].get_height(),bb[1].get_height()]) + 0.5, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p12), ha='center', va='bottom', color=col, fontsize = 8)
#statistics
x1, x2 = 1,2
y, h, col = np.max([bb[1].get_height(),bb[2].get_height()]) + 0.5, 0.02, 'k'
plt.plot([x1, x1, x2, x2], [y, y+h, y+h, y], lw=1.5, c=col)
plt.text((x1+x2)*.5, y+h, r"$p = {:f}$".format(p23), ha='center', va='bottom', color=col, fontsize = 8)
fig.savefig(out_dir + 'proportion_zigznagness_different_overlaps_bylen_len2.png', dpi=600, bbox_inches='tight')
def get_overlap_cts(l1,l2):
matched_seq = []
for i1 in range(len(l1)):
matches = [l2[i2] for i2 in range(len(l2)) if l2[i2] == l1[i1]]
matched_seq.extend(matches)
return matched_seq
def get_overlap(s1, s2):
s = difflib.SequenceMatcher(None, s1, s2)
pos_a, pos_b, size = s.find_longest_match(0, len(s1), 0, len(s2))
return s1[pos_a:pos_a+size]
def get_overlaplist(l1,l2,minlen=1):
matched_seq = []
for i1 in range(len(l1)):
# print('l1:{}'.format(i1))
matches = [i2 for i2 in range(len(l2)) if l2[i2] == l1[i1]]
# print(matches)
for m in matches:
matched_seq_ = []
i_a = 0
while ((i1+i_a)<len(l1)) and ((m+i_a)< len(l2)):
if l1[i1+i_a]==l2[m+i_a]:
matched_seq_.append(l1[i1+i_a])
i_a += 1
else:
break
matched_seq.append(matched_seq_)
idx = np.argsort([len(seq) for seq in matched_seq])[::-1]
# print(idx)
seqs_ = []
for i in idx:
if len(seqs_) == 0:
seqs_.append(matched_seq[i])
else:
if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
seqs_.append(matched_seq[i])
matched_seq = seqs_.copy()
idx = np.argsort([len(seq) for seq in matched_seq])
seqs_ = []
for i in idx:
if len(seqs_) == 0:
seqs_.append(matched_seq[i])
else:
if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
seqs_.append(matched_seq[i])
seqs = []
for seq in seqs_:
# print(len(seq))
if not len(seq)<minlen:
seqs.append(seq)
return seqs
def trimmer(matched_seq,minlen=1):
idx = np.argsort([len(seq) for seq in matched_seq])[::-1]
# print(idx)
seqs_ = []
for i in idx:
if len(seqs_) == 0:
seqs_.append(matched_seq[i])
else:
if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
seqs_.append(matched_seq[i])
matched_seq = seqs_.copy()
idx = np.argsort([len(seq) for seq in matched_seq])
seqs_ = []
for i in idx:
if len(seqs_) == 0:
seqs_.append(matched_seq[i])
else:
if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
seqs_.append(matched_seq[i])
seqs = []
for seq in seqs_:
# print(len(seq))
if not len(seq)<minlen:
seqs.append(seq)
return seqs
['same', 'reordering', 'partial change', 'totally different',
'inserting new cities', 'removed some from prev seq']
patterns = []
overlap_cts = []
overlap_cts_inlen=[]
overlap_seq_inorder = []
overlap_seq_inorder_inlen=[]
currmas = []
currmas_inlen=[]
endmas = []
endmas_inlen=[]
length_seq = []
for sub in range(100):
dat_sbj = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
overlap_cts_for_puzzle = []
overlap_cts_inlen_for_puzzle=[]
overlap_seq_inorder_for_puzzle = []
overlap_seq_inorder_inlen_for_puzzle=[]
currmas_for_puzzle = []
currmas_inlen_for_puzzle=[]
endmas_for_puzzle = []
endmas_inlen_for_puzzle=[]
patterns_for_puzzle = np.zeros((6))
for pzi in np.unique(sc_data_choice_level['puzzleID']):
dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()
firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
path_bf_undo = dat_sbj_pzi["currMas"][firstUndo_idx-1] # the mas of the state before undo
lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
path_af_undo = dat_sbj_pzi["currMas"][lastUndo_idx+1] # the mas of the state after undo
submit_idx = pd.Int64Index([len(dat_sbj_pzi)-1])
if len(firstUndo_idx)>0:
if len(firstUndo_idx)==1: #
seq_bf = dat_sbj_pzi["path"][firstUndo_idx-1]
seq_af = dat_sbj_pzi["path"][submit_idx]
seq_im = dat_sbj_pzi["path"][lastUndo_idx]
currmas_bf = dat_sbj_pzi["currMas"][lastUndo_idx-1]
currmas_af = dat_sbj_pzi["currMas"][lastUndo_idx+1]
endmas_bf = dat_sbj_pzi["currMas"][firstUndo_idx-1]
endmas_af = dat_sbj_pzi["currMas"][submit_idx]
else:
seq_bf = dat_sbj_pzi["path"][firstUndo_idx-1]
t_idx = (firstUndo_idx[1:]-1).to_list()
t_idx.append(submit_idx.item())
seq_af = dat_sbj_pzi["path"][t_idx]
seq_im = dat_sbj_pzi["path"][lastUndo_idx]
currmas_bf = dat_sbj_pzi["currMas"][lastUndo_idx-1]
currmas_af = dat_sbj_pzi["currMas"][lastUndo_idx+1]
endmas_bf = dat_sbj_pzi["currMas"][firstUndo_idx-1]
endmas_af = dat_sbj_pzi["currMas"][t_idx]
seq_im = seq_im.reset_index()
pathStr_im = [seq_im.loc[i].path.strip('[').strip(']') for i in range(len(seq_im))]
pathList_im = [[int(i) for i in pathStr_im[j].split(', ')] for j in range(len(pathStr_im))]
pathStr_im = [" ".join([str(a) for a in pathList_im[j]]) for j in range(len(seq_im))]
seq_bf = seq_bf.reset_index()
pathStr_bf = [seq_bf.loc[i].path.strip('[').strip(']') for i in range(len(seq_bf))]
pathList_bf = [[int(i) for i in pathStr_bf[j].split(', ')] for j in range(len(pathStr_bf))]
pathStr_bf = [" ".join([str(a) for a in pathList_bf[j]]) for j in range(len(seq_bf))]
seq_af = seq_af.reset_index()
pathStr_af = [seq_af.loc[i].path.strip('[').strip(']') for i in range(len(seq_af))]
pathList_af = [[int(i) for i in pathStr_af[j].split(', ')] for j in range(len(pathStr_af))]
pathStr_af = [" ".join([str(a) for a in pathList_af[j]]) for j in range(len(seq_af))]
pathList_bfim = [np.setdiff1d(np.array(pathList_bf[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_bf))]
pathStr_bfim = [" ".join([str(a) for a in pathList_bfim[j]]) for j in range(len(pathList_im))]
pathList_afim = [np.setdiff1d(np.array(pathList_af[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_af))]
pathStr_afim = [" ".join([str(a) for a in pathList_afim[j]]) for j in range(len(pathList_im))]
# seq_inorder = [get_overlap(pathStr_af[i], pathStr_bf[i]).strip('0 ').split(' ') for i in range(len(seq_af))]
# seq_inorder = [[] if i[0]=='' else [int(j) for j in i] for i in seq_inorder]
cts = [get_overlap_cts(pathList_afim[i], pathList_bfim[i]) for i in range(len(seq_af))for j in range(i+1) ]
# seq_inorder = [[] if i[0]=='' else [int(j) for j in i] for i in seq_inorder]
len_af = []
for i in range(len(seq_af)):
temp = []
temp2 = []
for j in range(i+1):
temptemp = get_overlap_cts(pathList_afim[i], pathList_bfim[j])
temptemp2 = get_overlaplist(pathList_afim[i], pathList_bfim[j])
temp_list2 = []
for t_ in temptemp2:
temp_list2.extend(t_)
len_af.append([len(temp_list2) ,np.mean([len(tt) for tt in temptemp2])
,len(pathList_afim[i]) ,len(pathList_bfim[j])])
# same length
if len_af[-1][2]==(len_af[-1][3]):
# exactly same sequence
if len_af[-1][2] == len_af[-1][1]:
# print('same')
patterns_for_puzzle[0]+=1
# not exactly same sequence,
else:
# but has same cities in the squence
if len_af[-1][2] == len_af[-1][0]:# same
# preint('reordering')
patterns_for_puzzle[1]+=1
# different cities
else:
# no overlapping cities at all
if len_af[-1][0]==0:
# print('totally different')
patterns_for_puzzle[3]+=1
# has some overlapping cities.
else:
# print('partial overlap')
patterns_for_puzzle[2]+=1
else: # different length
# path before is exactly composed of the overlapping cities
# which means that you inserted some in the path after undo.
if len_af[-1][3] == len_af[-1][0]:
# print('inserting new cities')
patterns_for_puzzle[4]+=1
# path after undo is exactly composed of the overlapping cities
# which means removing one city from previous.
elif len_af[-1][2]==len_af[-1][0]:
# print('removed some from prev seq')
patterns_for_puzzle[5]+=1
else:
# if there is not in common
if len_af[-1][0]==0:
# print('totally different')
patterns_for_puzzle[3]+=1
else:
# print('partial overlap')
patterns_for_puzzle[2]+=1
# # print('used some of the sequences')
# patterns_for_puzzle[6]+=1
patterns.append(patterns_for_puzzle)
patterns = np.array(patterns)
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/numpy/core/_methods.py:189: RuntimeWarning: invalid value encountered in double_scalars ret = ret.dtype.type(ret / rcount)
patterns_p = np.divide(patterns, np.sum(patterns,axis=1).reshape(-1,1))
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:1: RuntimeWarning: invalid value encountered in true_divide """Entry point for launching an IPython kernel.
np.nanmean(patterns_p,axis=0)
array([0.06960946, 0. , 0.32733827, 0.32092617, 0.20247063,
0.07965548])
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(10,6))
plt.bar(range(6), np.nanmean(patterns_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(patterns_p,axis = 0)/np.sqrt(patterns_p.shape[0]))
plt.xticks(range(6), ['same', 'reordering\n(exactly same cities\nbut different order)', 'partial\noverlap',
'totally\ndifferent\n(no overlap at all)',
'inserting\nnew cities', 'removed some\nfrom prev seq'])
([<matplotlib.axis.XTick at 0x7fed4a71d7d0>, <matplotlib.axis.XTick at 0x7fed4a71d390>, <matplotlib.axis.XTick at 0x7fed2bbf7c50>, <matplotlib.axis.XTick at 0x7fed4a71b290>, <matplotlib.axis.XTick at 0x7fed4a71b3d0>, <matplotlib.axis.XTick at 0x7fed08ab5a50>], [Text(0, 0, 'same'), Text(1, 0, 'reordering\n(exactly same cities\nbut different order)'), Text(2, 0, 'partial\noverlap'), Text(3, 0, 'totally\ndifferent\n(no overlap at all)'), Text(4, 0, 'inserting\nnew cities'), Text(5, 0, 'removed some\nfrom prev seq')])
import copy
def get_tortuosity(pathList, pzi = 15):
def cal_dist(xy1, xy2):
return np.sqrt(np.sum((np.array(xy1)-np.array(xy2))**2))
pathList_ = copy.deepcopy(pathList)
### for the def
import json
with open('./util/basicMap.json','rb') as f:
basic_map = json.load(f)
tortuosity = []
###
map_pz = basic_map[pzi]
import numpy as np
for _ in range(len(pathList_)):
path = pathList_.pop(0)
if len(path)>1:
ct_init = path.pop(0)
# print(path)
# print('init{}_tgt{}'.format(ct_init, path[-1]))
# print('map_init{}_map_tgt{}'.format(map_pz['xy'][ct_init], map_pz['xy'][path[-1]]))
d_straight = cal_dist(map_pz['xy'][ct_init], map_pz['xy'][path[-1]])
d_zigzag = 0
for _ in range(len(path)):
ct_tgt = path.pop(0)
d_zigzag += cal_dist(map_pz['xy'][ct_init], map_pz['xy'][ct_tgt])
ct_init = ct_tgt
tortuosity.append([d_zigzag, d_straight])
else:
tortuosity.append([np.nan, np.nan])
return np.array(tortuosity)
import difflib
def get_overlap(s1, s2):
s = difflib.SequenceMatcher(None, s1, s2)
pos_a, pos_b, size = s.find_longest_match(0, len(s1), 0, len(s2))
return s1[pos_a:pos_a+size]
def get_overlaplist(l1,l2,minlen=1):
matched_seq = []
for i1 in range(len(l1)):
# print('l1:{}'.format(i1))
matches = [i2 for i2 in range(len(l2)) if l2[i2] == l1[i1]]
# print(matches)
for m in matches:
matched_seq_ = []
i_a = 0
while ((i1+i_a)<len(l1)) and ((m+i_a)< len(l2)):
if l1[i1+i_a]==l2[m+i_a]:
matched_seq_.append(l1[i1+i_a])
i_a += 1
else:
break
matched_seq.append(matched_seq_)
idx = np.argsort([len(seq) for seq in matched_seq])[::-1]
# print(idx)
seqs_ = []
for i in idx:
if len(seqs_) == 0:
seqs_.append(matched_seq[i])
else:
if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
seqs_.append(matched_seq[i])
matched_seq = seqs_.copy()
idx = np.argsort([len(seq) for seq in matched_seq])
seqs_ = []
for i in idx:
if len(seqs_) == 0:
seqs_.append(matched_seq[i])
else:
if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
seqs_.append(matched_seq[i])
seqs = []
for seq in seqs_:
# print(len(seq))
if not len(seq)<minlen:
seqs.append(seq)
return seqs
def trimmer(matched_seq,minlen=1):
idx = np.argsort([len(seq) for seq in matched_seq])[::-1]
# print(idx)
seqs_ = []
for i in idx:
if len(seqs_) == 0:
seqs_.append(matched_seq[i])
else:
if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
seqs_.append(matched_seq[i])
matched_seq = seqs_.copy()
idx = np.argsort([len(seq) for seq in matched_seq])
seqs_ = []
for i in idx:
if len(seqs_) == 0:
seqs_.append(matched_seq[i])
else:
if not np.any([np.all(np.isin(matched_seq[i],seq)) for seq in seqs_]):
seqs_.append(matched_seq[i])
seqs = []
for seq in seqs_:
# print(len(seq))
if not len(seq)<minlen:
seqs.append(seq)
return seqs
undo_level = data_choice_level[data_choice_level['condition']==1].copy().reset_index()
basic_level = data_choice_level[data_choice_level['condition']==0].copy().reset_index()
undo_zigzag = []
basic_zigzag = []
zigzag_undo_all = []
zigzag_basic_all = []
zigzag_1st_lst = []
for sub in range(100):
# dat_sbj = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
dat_sbj_undo = undo_level[undo_level['subjects']==sub].sort_values(["puzzleID","index"])
dat_sbj_basic = basic_level[basic_level['subjects']==sub].sort_values(["puzzleID","index"])
undo_zigzag_puzzle = []
basic_zigzag_puzzle = []
zigzag_1st_lst_puzzle = []
for pzi in np.unique(sc_data_choice_level['puzzleID']):
dat_sbj_undo_pzi = dat_sbj_undo[dat_sbj_undo['puzzleID'] == pzi].reset_index()
dat_sbj_basic_pzi = dat_sbj_basic[dat_sbj_basic['puzzleID'] == pzi].reset_index()
firstUndo_idx = dat_sbj_undo_pzi[dat_sbj_undo_pzi["firstUndo"]==1].index
lastUndo_idx = dat_sbj_undo_pzi[dat_sbj_undo_pzi["lastUndo"]==1].index
submit_idx = pd.Int64Index([len(dat_sbj_undo_pzi)-1])
if len(firstUndo_idx)>0:
if len(firstUndo_idx)==1: #
seq_bf = dat_sbj_undo_pzi["path"][firstUndo_idx-1]
seq_af = dat_sbj_undo_pzi["path"][submit_idx]
seq_im = dat_sbj_undo_pzi["path"][lastUndo_idx]
else:
seq_bf = dat_sbj_undo_pzi["path"][firstUndo_idx-1]
t_idx = (firstUndo_idx[1:]-1).to_list()
t_idx.append(submit_idx.item())
seq_af = dat_sbj_undo_pzi["path"][t_idx]
seq_im = dat_sbj_undo_pzi["path"][lastUndo_idx]
seq_im = seq_im.reset_index()
pathStr_im = [seq_im.loc[i].path.strip('[').strip(']') for i in range(len(seq_im))]
pathList_im = [[int(i) for i in pathStr_im[j].split(', ')] for j in range(len(pathStr_im))]
pathStr_im = [" ".join([str(a) for a in pathList_im[j]]) for j in range(len(seq_im))]
seq_bf = seq_bf.reset_index()
pathStr_bf = [seq_bf.loc[i].path.strip('[').strip(']') for i in range(len(seq_bf))]
pathList_bf = [[int(i) for i in pathStr_bf[j].split(', ')] for j in range(len(pathStr_bf))]
pathStr_bf = [" ".join([str(a) for a in pathList_bf[j]]) for j in range(len(seq_bf))]
seq_af = seq_af.reset_index()
pathStr_af = [seq_af.loc[i].path.strip('[').strip(']') for i in range(len(seq_af))]
pathList_af = [[int(i) for i in pathStr_af[j].split(', ')] for j in range(len(pathStr_af))]
pathStr_af = [" ".join([str(a) for a in pathList_af[j]]) for j in range(len(seq_af))]
pathList_bfim = [np.setdiff1d(np.array(pathList_bf[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_bf))]
pathStr_bfim = [" ".join([str(a) for a in pathList_bfim[j]]) for j in range(len(pathList_im))]
pathList_afim = [np.setdiff1d(np.array(pathList_af[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_af))]
pathStr_afim = [" ".join([str(a) for a in pathList_afim[j]]) for j in range(len(pathList_im))]
seq_basic = dat_sbj_basic_pzi['path'].reset_index()
pathStr_basic = [seq_basic.loc[i].path.strip('[').strip(']') for i in range(len(seq_basic))]
pathList_basic = [[int(i) for i in pathStr_basic[j].split(', ')] for j in range(len(pathStr_basic))]
tor_basic = get_tortuosity([pathList_basic[-1]],pzi)
path_basic = (tor_basic[:,0]/tor_basic[:,1])
zigzag_basic = path_basic.tolist()
tor_bf = get_tortuosity(pathList_bf, pzi)
path_bf_undo = (tor_bf[:,0]/tor_bf[:,1])
tor_af = get_tortuosity(pathList_af, pzi)
path_af_undo = (tor_af[:,0]/tor_af[:,1])
zigzag_undo = [*path_af_undo.tolist(), path_bf_undo[-1]]
zz = []
for count, z in enumerate(zigzag_undo):
zz.append([count,z])
zigzag_undo_all.extend(zz)
zigzag_basic_all.extend(zigzag_basic)
# undo_zigzag_puzzle.append(np.mean(zigzag_undo[0]))
undo_zigzag_puzzle.append(np.mean(zigzag_undo))
basic_zigzag_puzzle.append(zigzag_basic[0])
zigzag_1st_lst_puzzle.append([zigzag_basic[0], zigzag_undo[0], np.mean(zigzag_undo), zigzag_undo[-1]])
undo_zigzag_puzzle = np.array(undo_zigzag_puzzle)
basic_zigzag_puzzle = np.array(basic_zigzag_puzzle)
zigzag_1st_lst_puzzle = np.array(zigzag_1st_lst_puzzle)
undo_zigzag.append(undo_zigzag_puzzle)
basic_zigzag.append(basic_zigzag_puzzle)
zigzag_1st_lst.append(zigzag_1st_lst_puzzle)
undo_zigzag = np.array(undo_zigzag)
basic_zigzag = np.array(basic_zigzag)
zigzag_1st_lst = np.array(zigzag_1st_lst)
bu=np.array([np.mean(basic_zigzag,axis=1),np.mean(undo_zigzag,axis=1)])
zigzag_1st_lst_mean = np.mean(zigzag_1st_lst,axis=1)
stat,p= ttest_ind(np.mean(basic_zigzag,axis=1),np.mean(undo_zigzag,axis=1))
print(p)
stat,p= ttest_ind(zigzag_1st_lst_mean[:,0],zigzag_1st_lst_mean[:,2])
print(p)
0.8998599339579605 0.8998599339579652
stat,p= wilcoxon(np.mean(basic_zigzag,axis=1),np.mean(undo_zigzag,axis=1))
print(p)
stat,p= wilcoxon(zigzag_1st_lst_mean[:,0],zigzag_1st_lst_mean[:,2])
print(p)
0.7010287235625708 0.7010287235625708
%matplotlib inline
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(10,4.5))
bb = plt.bar(range(4), np.nanmean(zigzag_1st_lst_mean,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(zigzag_1st_lst_mean,axis = 0)/np.sqrt(zigzag_1st_lst_mean.shape[0]))
plt.xticks(range(4), ['Path \nin without undo','The first path','Avg. of paths\nin with undo','The last path'])
plt.ylabel('tortuosity')
Text(0, 0.5, 'tortuosity')
%matplotlib inline
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
bb = plt.bar(range(2), np.nanmean(zigzag_1st_lst_mean[:,[0,2]],axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(zigzag_1st_lst_mean[:,[0,2]],axis = 0)/np.sqrt(zigzag_1st_lst_mean.shape[0]))
plt.xticks(range(2), ['Paths \nin without undo','Paths \nin with undo'])
plt.ylabel('tortuosity')
plt.title('No difference!')
Text(0.5, 1.0, 'No difference!')
import difflib
def get_overlaplist(l1,l2):
matched_seq = []
for i1 in range(len(l1)):
matches = [l2[i2] for i2 in range(len(l2)) if l2[i2] == l1[i1]]
matched_seq.extend(matches)
return matched_seq
overlap_seq_inorder = []
overlap_seq_inorder_inlen=[]
for sub in range(100):
dat_sbj = sc_data_choice_level[sc_data_choice_level['subjects']==sub].sort_values(["puzzleID","index"])
overlap_seq_inorder_for_puzzle = []
overlap_seq_inorder_inlen_for_puzzle=[]
for pzi in np.unique(sc_data_choice_level['puzzleID']):
dat_sbj_pzi = dat_sbj[dat_sbj['puzzleID'] == pzi].reset_index()
firstUndo_idx = dat_sbj_pzi[dat_sbj_pzi["firstUndo"]==1].index
path_bf_undo = dat_sbj_pzi["currMas"][firstUndo_idx-1] # the mas of the state before undo
lastUndo_idx = dat_sbj_pzi[dat_sbj_pzi["lastUndo"]==1].index
path_af_undo = dat_sbj_pzi["currMas"][lastUndo_idx+1] # the mas of the state after undo
submit_idx = pd.Int64Index([len(dat_sbj_pzi)-1])
if len(firstUndo_idx)>0:
if len(firstUndo_idx)==1: #
seq_bf = dat_sbj_pzi["path"][firstUndo_idx-1]
seq_af = dat_sbj_pzi["path"][submit_idx]
seq_im = dat_sbj_pzi["path"][lastUndo_idx]
else:
seq_bf = dat_sbj_pzi["path"][firstUndo_idx-1]
t_idx = (firstUndo_idx[1:]-1).to_list()
t_idx.append(submit_idx.item())
seq_af = dat_sbj_pzi["path"][t_idx]
seq_im = dat_sbj_pzi["path"][lastUndo_idx]
seq_im = seq_im.reset_index()
pathStr_im = [seq_im.loc[i].path.strip('[').strip(']') for i in range(len(seq_im))]
pathList_im = [[int(i) for i in pathStr_im[j].split(', ')] for j in range(len(pathStr_im))]
pathStr_im = [" ".join([str(a) for a in pathList_im[j]]) for j in range(len(seq_im))]
seq_bf = seq_bf.reset_index()
pathStr_bf = [seq_bf.loc[i].path.strip('[').strip(']') for i in range(len(seq_bf))]
pathList_bf = [[int(i) for i in pathStr_bf[j].split(', ')] for j in range(len(pathStr_bf))]
pathStr_bf = [" ".join([str(a) for a in pathList_bf[j]]) for j in range(len(seq_bf))]
seq_af = seq_af.reset_index()
pathStr_af = [seq_af.loc[i].path.strip('[').strip(']') for i in range(len(seq_af))]
pathList_af = [[int(i) for i in pathStr_af[j].split(', ')] for j in range(len(pathStr_af))]
pathStr_af = [" ".join([str(a) for a in pathList_af[j]]) for j in range(len(seq_af))]
pathList_bfim = [np.setdiff1d(np.array(pathList_bf[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_bf))]
pathStr_bfim = [" ".join([str(a) for a in pathList_bfim[j]]) for j in range(len(pathList_im))]
pathList_afim = [np.setdiff1d(np.array(pathList_af[i]),np.array(pathList_im[i])).tolist() for i in range(len(pathList_af))]
pathStr_afim = [" ".join([str(a) for a in pathList_afim[j]]) for j in range(len(pathList_im))]
# seq_inorder = [get_overlap(pathStr_af[i], pathStr_bf[i]).strip('0 ').split(' ') for i in range(len(seq_af))]
# seq_inorder = [[] if i[0]=='' else [int(j) for j in i] for i in seq_inorder]
seq_inorder = [get_overlaplist(pathList_afim[i], pathList_bfim[j]) for i in range(len(seq_af))for j in range(i+1) ]
# seq_inorder = [[] if i[0]=='' else [int(j) for j in i] for i in seq_inorder]
len_seq_inorder = [len(s) for s in seq_inorder]
# overlap_seq_inorder_for_puzzle.append(len_seq_inorder)
overlap_seq_inorder_for_puzzle.append([np.sum(np.array(len_seq_inorder)==0), np.sum(np.array(len_seq_inorder)!=0)])
overlap_seq_inorder_inlen_for_puzzle.append([np.sum(np.array(len_seq_inorder)==0), np.sum(np.array(len_seq_inorder)==1),
np.sum(np.array(len_seq_inorder)==2), np.sum(np.array(len_seq_inorder)==3),
np.sum(np.array(len_seq_inorder)==4), np.sum(np.array(len_seq_inorder)>4),])
else:
overlap_seq_inorder_for_puzzle.append([np.nan,np.nan])
overlap_seq_inorder_inlen_for_puzzle.append([np.nan,np.nan,np.nan,np.nan,np.nan,np.nan,])
overlap_seq_inorder_for_puzzle = np.array(overlap_seq_inorder_for_puzzle)
temp = overlap_seq_inorder_for_puzzle.copy()
overlap_seq_inorder.append(np.nansum(temp,axis=0))
overlap_seq_inorder_inlen_for_puzzle=np.array(overlap_seq_inorder_inlen_for_puzzle)
temp = overlap_seq_inorder_inlen_for_puzzle.copy()
overlap_seq_inorder_inlen.append(np.nansum(temp,axis=0))
overlap_seq_inorder = np.array(overlap_seq_inorder)
overlap_seq_inorder_inlen = np.array(overlap_seq_inorder_inlen)
overlap_seq_inorder_inlen_minlen2 = overlap_seq_inorder_inlen.copy()
overlap_seq_inorder_inlen_minlen2 = overlap_seq_inorder_inlen_minlen2[:,np.setdiff1d(range(overlap_seq_inorder_inlen_minlen2.shape[1]),1)]
overlap_seq_inorder_minlen2 = overlap_seq_inorder_inlen_minlen2.copy()
overlap_seq_inorder_minlen2 = np.array([overlap_seq_inorder_minlen2[:,0], np.sum(overlap_seq_inorder_minlen2[:,1:],axis=1) ]).transpose()
overlap_seq_inorder_p = overlap_seq_inorder/np.sum(overlap_seq_inorder,axis=1)[:,None]
overlap_seq_inorder_inlen_p = overlap_seq_inorder_inlen/np.sum(overlap_seq_inorder_inlen,axis=1)[:,None]
overlap_seq_inorder_minlen2_p = overlap_seq_inorder_minlen2/np.sum(overlap_seq_inorder_minlen2,axis=1)[:,None]
overlap_seq_inorder_inlen_minlen2_p = overlap_seq_inorder_inlen_minlen2/np.sum(overlap_seq_inorder_inlen_minlen2,axis=1)[:,None]
/Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:6: RuntimeWarning: invalid value encountered in true_divide /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:7: RuntimeWarning: invalid value encountered in true_divide import sys /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: invalid value encountered in true_divide # Remove the CWD from sys.path while we load stuff. /Users/dongjaekim/opt/anaconda3/envs/base37/lib/python3.7/site-packages/ipykernel_launcher.py:11: RuntimeWarning: invalid value encountered in true_divide # This is added back by InteractiveShellApp.init_path()
print(np.nanmean(overlap_seq_inorder_p,axis=0))
print(np.nanmean(overlap_seq_inorder_inlen_p,axis=0))
print(np.nanmean(overlap_seq_inorder_minlen2_p,axis=0))
print(np.nanmean(overlap_seq_inorder_inlen_minlen2_p,axis=0))
[0.35854577 0.64145423] [0.35854577 0.21940371 0.08219769 0.08340843 0.09223434 0.16421005] [0.45599214 0.54400786] [0.45599214 0.11219922 0.10755462 0.11707653 0.20717749]
%matplotlib notebook
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['font.size'] = 11
plt.figure(figsize=(6,4.5))
plt.bar(range(2), np.nanmean(overlap_seq_inorder_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(overlap_seq_inorder_p,axis = 0)/np.sqrt(overlap_seq_inorder_p.shape[0]))
plt.xticks([0,1], ['totally different','has some cities that overlaps'])
plt.xlabel('comparison path before and after undoing')
Text(0.5, 0, 'comparison path before and after undoing')
%matplotlib notebook
plt.figure()
plt.bar(range(6), np.nanmean(overlap_seq_inorder_inlen_p,axis=0),
color=[.7,.7,.7], edgecolor = 'k',
yerr=np.nanstd(overlap_seq_inorder_inlen_p,axis = 0)/np.sqrt(overlap_seq_inorder_inlen_p.shape[0]))
plt.xticks(range(6), ['totally \ndifferent','len=1','len=2','len=3'
,'len=4','len>=5'])
plt.xlabel('comparison path before and after undoing')
Text(0.5, 0, 'comparison path before and after undoing')
common_idx = np.intersect1d(np.where(~np.isnan(currmas_minlen2_p[:,0]))[0], np.where(~np.isnan(undo_zigzag_diff_minlen2_p[:,0]))[0])
from scipy.stats import pearsonr
print(pearsonr(currmas_minlen2_p[common_idx,0], undo_zigzag_diff_minlen2_p[common_idx,0]))
(0.10207177016241915, 0.38027585152785426)